-
Notifications
You must be signed in to change notification settings - Fork 233
/
test_df_cols.py
754 lines (752 loc) · 244 KB
/
test_df_cols.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
from pyspark.sql.types import *
import optimus.helpers.functions_spark
from optimus import Optimus
from optimus.helpers.json import json_enconding
from optimus.helpers.functions import deep_sort
import unittest
from pyspark.ml.linalg import Vectors, VectorUDT, DenseVector
import numpy as np
nan = np.nan
import datetime
from pyspark.sql import functions as F
op = Optimus(master='local')
source_df=op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
class Test_df_cols(unittest.TestCase):
maxDiff = None
@staticmethod
def test_cols_abs():
actual_df =source_df.cols.abs('height(ft)')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", 28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_abs_all_columns():
actual_df =source_df.cols.abs('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", 28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_add():
actual_df =source_df.cols.add(['height(ft)','rank'])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', FloatType(), True),('function', StringType(), True),('rank', FloatType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('sum', FloatType(), True)], [("Optim'us", -28.0, 'Leader', 10.0, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, -18.0), ('bumbl#ebéé ', 17.0, 'Espionage', 7.0, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 24.0), ('ironhide&', 26.0, 'Security', 7.0, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 33.0), ('Jazz', 13.0, 'First Lieutenant', 8.0, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 21.0), ('Megatron', None, 'None', 10.0, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None), ('Metroplex_)^$', 300.0, 'Battle Station', 8.0, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 308.0), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_add_all_columns():
actual_df =source_df.cols.add('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', FloatType(), True),('function', StringType(), True),('rank', FloatType(), True),('age', FloatType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('sum', FloatType(), True)], [("Optim'us", -28.0, 'Leader', 10.0, 5000000.0, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 4999986.5), ('bumbl#ebéé ', 17.0, 'Espionage', 7.0, 5000000.0, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 5000026.0), ('ironhide&', 26.0, 'Security', 7.0, 5000000.0, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 5000037.0), ('Jazz', 13.0, 'First Lieutenant', 8.0, 5000000.0, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 5000023.0), ('Megatron', None, 'None', 10.0, 5000000.0, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None), ('Metroplex_)^$', 300.0, 'Battle Station', 8.0, 5000000.0, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_append_number():
actual_df = optimus.helpers.functions_spark.append('new col', 1)
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('new col', IntegerType(), False)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 1), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 1), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 1), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 1), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, 1), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 1), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 1)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_cast():
actual_df =source_df.cols.cast('function','string')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_cast_all_columns():
actual_df =source_df.cols.cast('*','string')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', StringType(), True),('function', StringType(), True),('rank', StringType(), True),('age', StringType(), True),('weight(t)', StringType(), True),('japanese name', StringType(), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', StringType(), True),('Date Type', StringType(), True),('timestamp', StringType(), True),('Cybertronian', StringType(), True),('function(binary)', StringType(), True),('NullType', StringType(), True)], [("Optim'us", '-28', 'Leader', '10', '5000000', '4.3', '[Inochi, Convoy]', '19.442735,-99.201111', '1980/04/10', '2016/09/10', '[8.5344, 4300.0]', '2016-09-10', '2014-06-24 00:00:00', 'true', 'Leader', None), ('bumbl#ebéé ', '17', 'Espionage', '7', '5000000', '2.0', '[Bumble, Goldback]', '10.642707,-71.612534', '1980/04/10', '2015/08/10', '[5.334, 2000.0]', '2015-08-10', '2014-06-24 00:00:00', 'true', 'Espionage', None), ('ironhide&', '26', 'Security', '7', '5000000', '4.0', '[Roadbuster]', '37.789563,-122.400356', '1980/04/10', '2014/07/10', '[7.9248, 4000.0]', '2014-06-24', '2014-06-24 00:00:00', 'true', 'Security', None), ('Jazz', '13', 'First Lieutenant', '8', '5000000', '1.8', '[Meister]', '33.670666,-117.841553', '1980/04/10', '2013/06/10', '[3.9624, 1800.0]', '2013-06-24', '2014-06-24 00:00:00', 'true', 'First Lieutenant', None), ('Megatron', None, 'None', '10', '5000000', '5.7', '[Megatron]', None, '1980/04/10', '2012/05/10', '[, 5700.0]', '2012-05-10', '2014-06-24 00:00:00', 'true', 'None', None), ('Metroplex_)^$', '300', 'Battle Station', '8', '5000000', None, '[Metroflex]', None, '1980/04/10', '2011/04/10', '[91.44,]', '2011-04-10', '2014-06-24 00:00:00', 'true', 'Battle Station', None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_clip():
actual_df =source_df.cols.clip('rank',3,5)
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', IntegerType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 5, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 5, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 5, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 5, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 5, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 5, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_clip_all_columns():
actual_df =source_df.cols.clip('*',3,5)
expected_df = op.create.df([('names', StringType(), True),('height(ft)', IntegerType(), True),('function', StringType(), True),('rank', IntegerType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", 3, 'Leader', 5, 5, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 5, 'Espionage', 5, 5, 3.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 5, 'Security', 5, 5, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 5, 'First Lieutenant', 5, 5, 3.0, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 5, 5, 5.0, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 5, 'Battle Station', 5, 5, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_count():
actual_df =source_df.cols.count()
actual_df =json_enconding(actual_df)
expected_value =json_enconding(16)
assert (expected_value == actual_df)
def test_cols_count_by_dtypes(self):
actual_df =source_df.cols.count_by_dtypes('*',infer=False)
expected_value ={'names': {'int': 0, 'decimal': 0, 'string': 6, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 1, 'missing': 0}, 'height(ft)': {'int': 5, 'decimal': 0, 'string': 0, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 2, 'missing': 0}, 'function': {'int': 0, 'decimal': 0, 'string': 6, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 1, 'missing': 0}, 'rank': {'int': 6, 'decimal': 0, 'string': 0, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 1, 'missing': 0}, 'age': {'int': 6, 'decimal': 0, 'string': 0, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 1, 'missing': 0}, 'weight(t)': {'int': 0, 'decimal': 5, 'string': 0, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 2, 'missing': 0}, 'japanese name': {'int': 0, 'decimal': 0, 'string': 0, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 6, 'object': 0, 'null': 1, 'missing': 0}, 'last position seen': {'int': 0, 'decimal': 0, 'string': 4, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 3, 'missing': 0}, 'date arrival': {'int': 0, 'decimal': 0, 'string': 6, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 1, 'missing': 0}, 'last date seen': {'int': 0, 'decimal': 0, 'string': 6, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 1, 'missing': 0}, 'attributes': {'int': 0, 'decimal': 0, 'string': 0, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 6, 'object': 0, 'null': 1, 'missing': 0}, 'Date Type': {'int': 0, 'decimal': 0, 'string': 0, 'date': 6, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 1, 'missing': 0}, 'timestamp': {'int': 0, 'decimal': 0, 'string': 0, 'date': 6, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 1, 'missing': 0}, 'Cybertronian': {'int': 0, 'decimal': 0, 'string': 0, 'date': 0, 'boolean': 6, 'binary': 0, 'array': 0, 'object': 0, 'null': 1, 'missing': 0}, 'function(binary)': {'int': 0, 'decimal': 0, 'string': 0, 'date': 0, 'boolean': 0, 'binary': 6, 'array': 0, 'object': 0, 'null': 1, 'missing': 0}, 'NullType': {'int': 0, 'decimal': 0, 'string': 0, 'date': 0, 'boolean': 0, 'binary': 0, 'array': 0, 'object': 0, 'null': 7, 'missing': 0}}
self.assertDictEqual(deep_sort(expected_value), deep_sort(actual_df))
def test_cols_count_by_dtypes_infer(self):
actual_df =source_df.cols.count_by_dtypes('*',infer=True)
expected_value ={'names': {'string': 6, 'null': 1, 'int': 0, 'decimal': 0, 'boolean': 0, 'date': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'missing': 0}, 'height(ft)': {'smallint': 5, 'null': 2}, 'function': {'string': 6, 'null': 1, 'int': 0, 'decimal': 0, 'boolean': 0, 'date': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'missing': 0}, 'rank': {'tinyint': 6, 'null': 1}, 'age': {'int': 6, 'null': 1}, 'weight(t)': {'float': 5, 'null': 2}, 'japanese name': {'array': 6, 'null': 1}, 'last position seen': {'array': 3, 'date': 1, 'null': 3, 'int': 0, 'decimal': 0, 'string': 0, 'boolean': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'missing': 0}, 'date arrival': {'date': 6, 'null': 1, 'int': 0, 'decimal': 0, 'string': 0, 'boolean': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'missing': 0}, 'last date seen': {'date': 6, 'null': 1, 'int': 0, 'decimal': 0, 'string': 0, 'boolean': 0, 'array': 0, 'object': 0, 'gender': 0, 'ip': 0, 'url': 0, 'email': 0, 'credit_card_number': 0, 'zip_code': 0, 'missing': 0}, 'attributes': {'array': 6, 'null': 1}, 'Date Type': {'date': 6, 'null': 1}, 'timestamp': {'timestamp': 6, 'null': 1}, 'Cybertronian': {'boolean': 6, 'null': 1}, 'function(binary)': {'binary': 6, 'null': 1}, 'NullType': {'null': 7}}
self.assertDictEqual(deep_sort(expected_value), deep_sort(actual_df))
def test_cols_count_mismatch(self):
source_df=op.create.df([('names', StringType(), True),('height(ft)', IntegerType(), True),('function', StringType(), True),('rank', IntegerType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('DateType', DateType(), True),('Timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), False),('NullType', NullType(), True)], [('31/12/2019', 28, '1978-12-20', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), ('1', 2, '3', 4, 5, 6.0, ['7'], '8', '1980/04/10', '2011/04/10', [11.0], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'15'), None)])
actual_df =source_df.cols.count_mismatch({"names": "int"})
expected_value ={'names': {'mismatch': 6, 'int': 1, 'null': 0, 'missing': 0}}
self.assertDictEqual(deep_sort(expected_value), deep_sort(actual_df))
@staticmethod
def test_cols_count_na():
actual_df =source_df.cols.count_na('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(2)
assert (expected_value == actual_df)
@staticmethod
def test_cols_count_na_all_columns():
actual_df =source_df.cols.count_na('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'names': 1, 'height(ft)': 2, 'function': 1, 'rank': 1, 'age': 1, 'weight(t)': 2, 'japanese name': 1, 'last position seen': 3, 'date arrival': 1, 'last date seen': 1, 'attributes': 1, 'Date Type': 1, 'timestamp': 1, 'Cybertronian': 1, 'function(binary)': 1, 'NullType': 7})
assert (expected_value == actual_df)
@staticmethod
def test_cols_count_uniques():
actual_df =source_df.cols.count_uniques('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(5)
assert (expected_value == actual_df)
@staticmethod
def test_cols_count_uniques_all_columns():
actual_df =source_df.cols.count_uniques('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'names': 5, 'height(ft)': 5, 'function': 6, 'rank': 3, 'age': 1, 'weight(t)': 5, 'japanese name': 6, 'last position seen': 4, 'date arrival': 1, 'last date seen': 6, 'attributes': 6, 'Date Type': 6, 'timestamp': 1, 'Cybertronian': 1, 'function(binary)': 6, 'NullType': 0})
assert (expected_value == actual_df)
@staticmethod
def test_cols_count_zeros():
actual_df =source_df.cols.count_zeros('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(0)
assert(expected_value == actual_df)
@staticmethod
def test_cols_count_zeros_all_columns():
actual_df =source_df.cols.count_zeros('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'names': 0, 'height(ft)': 0, 'function': 0, 'rank': 0, 'age': 0, 'weight(t)': 0, 'last position seen': 0, 'date arrival': 0, 'last date seen': 0})
assert (expected_value == actual_df)
@staticmethod
def test_cols_date_format():
actual_df =source_df.cols.date_format('date arrival','yyyy/MM/dd','dd-MM-YYYY')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '10-04-1980', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '10-04-1980', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '10-04-1980', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '10-04-1980', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '10-04-1980', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '10-04-1980', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_date_format_all_columns():
actual_df =source_df.cols.date_format(['date arrival','last date seen'],'yyyy/MM/dd','dd-MM-YYYY')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '10-04-1980', '10-09-2016', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '10-04-1980', '10-08-2015', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '10-04-1980', '10-07-2014', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '10-04-1980', '10-06-2013', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '10-04-1980', '10-05-2012', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '10-04-1980', '10-04-2011', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_div():
actual_df =source_df.cols.div(['height(ft)','rank'])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', FloatType(), True),('function', StringType(), True),('rank', FloatType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('div', DoubleType(), True)], [("Optim'us", -28.0, 'Leader', 10.0, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, -2.8), ('bumbl#ebéé ', 17.0, 'Espionage', 7.0, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 2.4285714285714284), ('ironhide&', 26.0, 'Security', 7.0, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 3.7142857142857144), ('Jazz', 13.0, 'First Lieutenant', 8.0, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 1.625), ('Megatron', None, 'None', 10.0, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None), ('Metroplex_)^$', 300.0, 'Battle Station', 8.0, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 37.5), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_div_all_columns():
actual_df =source_df.cols.div('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', FloatType(), True),('function', StringType(), True),('rank', FloatType(), True),('age', FloatType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('div', DoubleType(), True)], [("Optim'us", -28.0, 'Leader', 10.0, 5000000.0, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, -1.302325523628167e-07), ('bumbl#ebéé ', 17.0, 'Espionage', 7.0, 5000000.0, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 2.428571428571428e-07), ('ironhide&', 26.0, 'Security', 7.0, 5000000.0, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 1.8571428571428572e-07), ('Jazz', 13.0, 'First Lieutenant', 8.0, 5000000.0, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 1.8055556033864447e-07), ('Megatron', None, 'None', 10.0, 5000000.0, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None), ('Metroplex_)^$', 300.0, 'Battle Station', 8.0, 5000000.0, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_drop():
actual_df =source_df.cols.drop('rank')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_dtypes():
actual_df =source_df.cols.dtypes('rank')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'rank': 'tinyint'})
assert (expected_value == actual_df)
@staticmethod
def test_cols_dtypes_all_columns():
actual_df =source_df.cols.dtypes('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'names': 'string', 'height(ft)': 'smallint', 'function': 'string', 'rank': 'tinyint', 'age': 'int', 'weight(t)': 'float', 'japanese name': 'array<string>', 'last position seen': 'string', 'date arrival': 'string', 'last date seen': 'string', 'attributes': 'array<float>', 'Date Type': 'date', 'timestamp': 'timestamp', 'Cybertronian': 'boolean', 'function(binary)': 'binary', 'NullType': 'null'})
assert (expected_value == actual_df)
@staticmethod
def test_cols_fill_na():
actual_df =source_df.cols.fill_na('height(ft)','1')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', DoubleType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28.0, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17.0, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26.0, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13.0, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', 1.0, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300.0, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, 1.0, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_fill_na_all_columns():
actual_df =source_df.cols.fill_na(['names','height(ft)','function','rank','age'],'2')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', DoubleType(), True),('function', StringType(), True),('rank', DoubleType(), True),('age', DoubleType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28.0, 'Leader', 10.0, 5000000.0, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17.0, 'Espionage', 7.0, 5000000.0, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26.0, 'Security', 7.0, 5000000.0, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13.0, 'First Lieutenant', 8.0, 5000000.0, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', 2.0, 'None', 10.0, 5000000.0, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300.0, 'Battle Station', 8.0, 5000000.0, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), ('2', 2.0, '2', 2.0, 2.0, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_fill_na_array():
actual_df =source_df.cols.fill_na('japanese name',['1','2'])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),False), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, ['1', '2'], None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_fill_na_bool():
actual_df =source_df.cols.fill_na('Cybertronian',False)
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, False, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_hist():
actual_df =source_df.cols.hist(['height(ft)','rank'],4)
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': {'hist': [{'count': 4.0, 'lower': -28.0, 'upper': 54.0}, {'count': 0.0, 'lower': 54.0, 'upper': 136.0}, {'count': 0.0, 'lower': 136.0, 'upper': 218.0}, {'count': 0.0, 'lower': 218.0, 'upper': 300.0}]}, 'rank': {'hist': [{'count': 2.0, 'lower': 7.0, 'upper': 7.75}, {'count': 2.0, 'lower': 7.75, 'upper': 8.5}, {'count': 0.0, 'lower': 8.5, 'upper': 9.25}, {'count': 0.0, 'lower': 9.25, 'upper': 10.0}]}})
assert (expected_value == actual_df)
@staticmethod
def test_cols_hist_all_columns():
actual_df =source_df.cols.hist('Date Type',4)
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'Date Type': {'hist': {'hours': [{'count': 6.0, 'lower': 0.0, 'upper': 1.0}, {'count': 0.0, 'lower': 1.0, 'upper': 2.0}, {'count': 0.0, 'lower': 2.0, 'upper': 3.0}, {'count': 0.0, 'lower': 3.0, 'upper': 4.0}, {'count': 0.0, 'lower': 4.0, 'upper': 5.0}, {'count': 0.0, 'lower': 5.0, 'upper': 6.0}, {'count': 0.0, 'lower': 6.0, 'upper': 7.0}, {'count': 0.0, 'lower': 7.0, 'upper': 8.0}, {'count': 0.0, 'lower': 8.0, 'upper': 9.0}, {'count': 0.0, 'lower': 9.0, 'upper': 10.0}, {'count': 0.0, 'lower': 10.0, 'upper': 11.0}, {'count': 0.0, 'lower': 11.0, 'upper': 12.0}, {'count': 0.0, 'lower': 12.0, 'upper': 13.0}, {'count': 0.0, 'lower': 13.0, 'upper': 14.0}, {'count': 0.0, 'lower': 14.0, 'upper': 15.0}, {'count': 0.0, 'lower': 15.0, 'upper': 16.0}, {'count': 0.0, 'lower': 16.0, 'upper': 17.0}, {'count': 0.0, 'lower': 17.0, 'upper': 18.0}, {'count': 0.0, 'lower': 18.0, 'upper': 19.0}, {'count': 0.0, 'lower': 19.0, 'upper': 20.0}, {'count': 0.0, 'lower': 20.0, 'upper': 21.0}, {'count': 0.0, 'lower': 21.0, 'upper': 22.0}, {'count': 0.0, 'lower': 22.0, 'upper': 23.0}], 'seconds': [{'count': 6.0, 'lower': 0.0, 'upper': 1.0}, {'count': 0.0, 'lower': 1.0, 'upper': 2.0}, {'count': 0.0, 'lower': 2.0, 'upper': 3.0}, {'count': 0.0, 'lower': 3.0, 'upper': 4.0}, {'count': 0.0, 'lower': 4.0, 'upper': 5.0}, {'count': 0.0, 'lower': 5.0, 'upper': 6.0}, {'count': 0.0, 'lower': 6.0, 'upper': 7.0}, {'count': 0.0, 'lower': 7.0, 'upper': 8.0}, {'count': 0.0, 'lower': 8.0, 'upper': 9.0}, {'count': 0.0, 'lower': 9.0, 'upper': 10.0}, {'count': 0.0, 'lower': 10.0, 'upper': 11.0}, {'count': 0.0, 'lower': 11.0, 'upper': 12.0}, {'count': 0.0, 'lower': 12.0, 'upper': 13.0}, {'count': 0.0, 'lower': 13.0, 'upper': 14.0}, {'count': 0.0, 'lower': 14.0, 'upper': 15.0}, {'count': 0.0, 'lower': 15.0, 'upper': 16.0}, {'count': 0.0, 'lower': 16.0, 'upper': 17.0}, {'count': 0.0, 'lower': 17.0, 'upper': 18.0}, {'count': 0.0, 'lower': 18.0, 'upper': 19.0}, {'count': 0.0, 'lower': 19.0, 'upper': 20.0}, {'count': 0.0, 'lower': 20.0, 'upper': 21.0}, {'count': 0.0, 'lower': 21.0, 'upper': 22.0}, {'count': 0.0, 'lower': 22.0, 'upper': 23.0}, {'count': 0.0, 'lower': 23.0, 'upper': 24.0}, {'count': 0.0, 'lower': 24.0, 'upper': 25.0}, {'count': 0.0, 'lower': 25.0, 'upper': 26.0}, {'count': 0.0, 'lower': 26.0, 'upper': 27.0}, {'count': 0.0, 'lower': 27.0, 'upper': 28.0}, {'count': 0.0, 'lower': 28.0, 'upper': 29.0}, {'count': 0.0, 'lower': 29.0, 'upper': 30.0}, {'count': 0.0, 'lower': 30.0, 'upper': 31.0}, {'count': 0.0, 'lower': 31.0, 'upper': 32.0}, {'count': 0.0, 'lower': 32.0, 'upper': 33.0}, {'count': 0.0, 'lower': 33.0, 'upper': 34.0}, {'count': 0.0, 'lower': 34.0, 'upper': 35.0}, {'count': 0.0, 'lower': 35.0, 'upper': 36.0}, {'count': 0.0, 'lower': 36.0, 'upper': 37.0}, {'count': 0.0, 'lower': 37.0, 'upper': 38.0}, {'count': 0.0, 'lower': 38.0, 'upper': 39.0}, {'count': 0.0, 'lower': 39.0, 'upper': 40.0}, {'count': 0.0, 'lower': 40.0, 'upper': 41.0}, {'count': 0.0, 'lower': 41.0, 'upper': 42.0}, {'count': 0.0, 'lower': 42.0, 'upper': 43.0}, {'count': 0.0, 'lower': 43.0, 'upper': 44.0}, {'count': 0.0, 'lower': 44.0, 'upper': 45.0}, {'count': 0.0, 'lower': 45.0, 'upper': 46.0}, {'count': 0.0, 'lower': 46.0, 'upper': 47.0}, {'count': 0.0, 'lower': 47.0, 'upper': 48.0}, {'count': 0.0, 'lower': 48.0, 'upper': 49.0}, {'count': 0.0, 'lower': 49.0, 'upper': 50.0}, {'count': 0.0, 'lower': 50.0, 'upper': 51.0}, {'count': 0.0, 'lower': 51.0, 'upper': 52.0}, {'count': 0.0, 'lower': 52.0, 'upper': 53.0}, {'count': 0.0, 'lower': 53.0, 'upper': 54.0}, {'count': 0.0, 'lower': 54.0, 'upper': 55.0}, {'count': 0.0, 'lower': 55.0, 'upper': 56.0}, {'count': 0.0, 'lower': 56.0, 'upper': 57.0}, {'count': 0.0, 'lower': 57.0, 'upper': 58.0}, {'count': 0.0, 'lower': 58.0, 'upper': 59.0}, {'count': 0.0, 'lower': 59.0, 'upper': 60.0}], 'months': [{'count': 0.0, 'lower': 1.0, 'upper': 2.0}, {'count': 0.0, 'lower': 2.0, 'upper': 3.0}, {'count': 0.0, 'lower': 3.0, 'upper': 4.0}, {'count': 1.0, 'lower': 4.0, 'upper': 5.0}, {'count': 1.0, 'lower': 5.0, 'upper': 6.0}, {'count': 2.0, 'lower': 6.0, 'upper': 7.0}, {'count': 0.0, 'lower': 7.0, 'upper': 8.0}, {'count': 1.0, 'lower': 8.0, 'upper': 9.0}, {'count': 1.0, 'lower': 9.0, 'upper': 10.0}, {'count': 0.0, 'lower': 10.0, 'upper': 11.0}, {'count': 0.0, 'lower': 11.0, 'upper': 12.0}], 'years': [{'count': 0.0, 'lower': 1950.0, 'upper': 1951.0}, {'count': 0.0, 'lower': 1951.0, 'upper': 1952.0}, {'count': 0.0, 'lower': 1952.0, 'upper': 1953.0}, {'count': 0.0, 'lower': 1953.0, 'upper': 1954.0}, {'count': 0.0, 'lower': 1954.0, 'upper': 1955.0}, {'count': 0.0, 'lower': 1955.0, 'upper': 1956.0}, {'count': 0.0, 'lower': 1956.0, 'upper': 1957.0}, {'count': 0.0, 'lower': 1957.0, 'upper': 1958.0}, {'count': 0.0, 'lower': 1958.0, 'upper': 1959.0}, {'count': 0.0, 'lower': 1959.0, 'upper': 1960.0}, {'count': 0.0, 'lower': 1960.0, 'upper': 1961.0}, {'count': 0.0, 'lower': 1961.0, 'upper': 1962.0}, {'count': 0.0, 'lower': 1962.0, 'upper': 1963.0}, {'count': 0.0, 'lower': 1963.0, 'upper': 1964.0}, {'count': 0.0, 'lower': 1964.0, 'upper': 1965.0}, {'count': 0.0, 'lower': 1965.0, 'upper': 1966.0}, {'count': 0.0, 'lower': 1966.0, 'upper': 1967.0}, {'count': 0.0, 'lower': 1967.0, 'upper': 1968.0}, {'count': 0.0, 'lower': 1968.0, 'upper': 1969.0}, {'count': 0.0, 'lower': 1969.0, 'upper': 1970.0}, {'count': 0.0, 'lower': 1970.0, 'upper': 1971.0}, {'count': 0.0, 'lower': 1971.0, 'upper': 1972.0}, {'count': 0.0, 'lower': 1972.0, 'upper': 1973.0}, {'count': 0.0, 'lower': 1973.0, 'upper': 1974.0}, {'count': 0.0, 'lower': 1974.0, 'upper': 1975.0}, {'count': 0.0, 'lower': 1975.0, 'upper': 1976.0}, {'count': 0.0, 'lower': 1976.0, 'upper': 1977.0}, {'count': 0.0, 'lower': 1977.0, 'upper': 1978.0}, {'count': 0.0, 'lower': 1978.0, 'upper': 1979.0}, {'count': 0.0, 'lower': 1979.0, 'upper': 1980.0}, {'count': 0.0, 'lower': 1980.0, 'upper': 1981.0}, {'count': 0.0, 'lower': 1981.0, 'upper': 1982.0}, {'count': 0.0, 'lower': 1982.0, 'upper': 1983.0}, {'count': 0.0, 'lower': 1983.0, 'upper': 1984.0}, {'count': 0.0, 'lower': 1984.0, 'upper': 1985.0}, {'count': 0.0, 'lower': 1985.0, 'upper': 1986.0}, {'count': 0.0, 'lower': 1986.0, 'upper': 1987.0}, {'count': 0.0, 'lower': 1987.0, 'upper': 1988.0}, {'count': 0.0, 'lower': 1988.0, 'upper': 1989.0}, {'count': 0.0, 'lower': 1989.0, 'upper': 1990.0}, {'count': 0.0, 'lower': 1990.0, 'upper': 1991.0}, {'count': 0.0, 'lower': 1991.0, 'upper': 1992.0}, {'count': 0.0, 'lower': 1992.0, 'upper': 1993.0}, {'count': 0.0, 'lower': 1993.0, 'upper': 1994.0}, {'count': 0.0, 'lower': 1994.0, 'upper': 1995.0}, {'count': 0.0, 'lower': 1995.0, 'upper': 1996.0}, {'count': 0.0, 'lower': 1996.0, 'upper': 1997.0}, {'count': 0.0, 'lower': 1997.0, 'upper': 1998.0}, {'count': 0.0, 'lower': 1998.0, 'upper': 1999.0}, {'count': 0.0, 'lower': 1999.0, 'upper': 2000.0}, {'count': 0.0, 'lower': 2000.0, 'upper': 2001.0}, {'count': 0.0, 'lower': 2001.0, 'upper': 2002.0}, {'count': 0.0, 'lower': 2002.0, 'upper': 2003.0}, {'count': 0.0, 'lower': 2003.0, 'upper': 2004.0}, {'count': 0.0, 'lower': 2004.0, 'upper': 2005.0}, {'count': 0.0, 'lower': 2005.0, 'upper': 2006.0}, {'count': 0.0, 'lower': 2006.0, 'upper': 2007.0}, {'count': 0.0, 'lower': 2007.0, 'upper': 2008.0}, {'count': 0.0, 'lower': 2008.0, 'upper': 2009.0}, {'count': 0.0, 'lower': 2009.0, 'upper': 2010.0}, {'count': 0.0, 'lower': 2010.0, 'upper': 2011.0}, {'count': 1.0, 'lower': 2011.0, 'upper': 2012.0}, {'count': 1.0, 'lower': 2012.0, 'upper': 2013.0}, {'count': 1.0, 'lower': 2013.0, 'upper': 2014.0}, {'count': 1.0, 'lower': 2014.0, 'upper': 2015.0}, {'count': 1.0, 'lower': 2015.0, 'upper': 2016.0}, {'count': 1.0, 'lower': 2016.0, 'upper': 2017.0}, {'count': 0.0, 'lower': 2017.0, 'upper': 2018.0}, {'count': 0.0, 'lower': 2018.0, 'upper': 2019.0}], 'weekdays': [{'count': 1.0, 'lower': 1.0, 'upper': 1.97}, {'count': 2.0, 'lower': 1.97, 'upper': 2.94}, {'count': 1.0, 'lower': 2.94, 'upper': 3.9}, {'count': 0.0, 'lower': 3.9, 'upper': 4.87}, {'count': 1.0, 'lower': 4.87, 'upper': 5.84}, {'count': 0.0, 'lower': 5.84, 'upper': 6.81}, {'count': 1.0, 'lower': 6.81, 'upper': 7.77}, {'count': 0.0, 'lower': 7.77, 'upper': 8.74}, {'count': 0.0, 'lower': 8.74, 'upper': 9.71}, {'count': 0.0, 'lower': 9.71, 'upper': 10.68}, {'count': 0.0, 'lower': 10.68, 'upper': 11.65}, {'count': 0.0, 'lower': 11.65, 'upper': 12.61}, {'count': 0.0, 'lower': 12.61, 'upper': 13.58}, {'count': 0.0, 'lower': 13.58, 'upper': 14.55}, {'count': 0.0, 'lower': 14.55, 'upper': 15.52}, {'count': 0.0, 'lower': 15.52, 'upper': 16.48}, {'count': 0.0, 'lower': 16.48, 'upper': 17.45}, {'count': 0.0, 'lower': 17.45, 'upper': 18.42}, {'count': 0.0, 'lower': 18.42, 'upper': 19.39}, {'count': 0.0, 'lower': 19.39, 'upper': 20.35}, {'count': 0.0, 'lower': 20.35, 'upper': 21.32}, {'count': 0.0, 'lower': 21.32, 'upper': 22.29}, {'count': 0.0, 'lower': 22.29, 'upper': 23.26}, {'count': 0.0, 'lower': 23.26, 'upper': 24.23}, {'count': 0.0, 'lower': 24.23, 'upper': 25.19}, {'count': 0.0, 'lower': 25.19, 'upper': 26.16}, {'count': 0.0, 'lower': 26.16, 'upper': 27.13}, {'count': 0.0, 'lower': 27.13, 'upper': 28.1}, {'count': 0.0, 'lower': 28.1, 'upper': 29.06}, {'count': 0.0, 'lower': 29.06, 'upper': 30.03}, {'count': 0.0, 'lower': 30.03, 'upper': 31.0}], 'minutes': [{'count': 6.0, 'lower': 0.0, 'upper': 1.0}, {'count': 0.0, 'lower': 1.0, 'upper': 2.0}, {'count': 0.0, 'lower': 2.0, 'upper': 3.0}, {'count': 0.0, 'lower': 3.0, 'upper': 4.0}, {'count': 0.0, 'lower': 4.0, 'upper': 5.0}, {'count': 0.0, 'lower': 5.0, 'upper': 6.0}, {'count': 0.0, 'lower': 6.0, 'upper': 7.0}, {'count': 0.0, 'lower': 7.0, 'upper': 8.0}, {'count': 0.0, 'lower': 8.0, 'upper': 9.0}, {'count': 0.0, 'lower': 9.0, 'upper': 10.0}, {'count': 0.0, 'lower': 10.0, 'upper': 11.0}, {'count': 0.0, 'lower': 11.0, 'upper': 12.0}, {'count': 0.0, 'lower': 12.0, 'upper': 13.0}, {'count': 0.0, 'lower': 13.0, 'upper': 14.0}, {'count': 0.0, 'lower': 14.0, 'upper': 15.0}, {'count': 0.0, 'lower': 15.0, 'upper': 16.0}, {'count': 0.0, 'lower': 16.0, 'upper': 17.0}, {'count': 0.0, 'lower': 17.0, 'upper': 18.0}, {'count': 0.0, 'lower': 18.0, 'upper': 19.0}, {'count': 0.0, 'lower': 19.0, 'upper': 20.0}, {'count': 0.0, 'lower': 20.0, 'upper': 21.0}, {'count': 0.0, 'lower': 21.0, 'upper': 22.0}, {'count': 0.0, 'lower': 22.0, 'upper': 23.0}, {'count': 0.0, 'lower': 23.0, 'upper': 24.0}, {'count': 0.0, 'lower': 24.0, 'upper': 25.0}, {'count': 0.0, 'lower': 25.0, 'upper': 26.0}, {'count': 0.0, 'lower': 26.0, 'upper': 27.0}, {'count': 0.0, 'lower': 27.0, 'upper': 28.0}, {'count': 0.0, 'lower': 28.0, 'upper': 29.0}, {'count': 0.0, 'lower': 29.0, 'upper': 30.0}, {'count': 0.0, 'lower': 30.0, 'upper': 31.0}, {'count': 0.0, 'lower': 31.0, 'upper': 32.0}, {'count': 0.0, 'lower': 32.0, 'upper': 33.0}, {'count': 0.0, 'lower': 33.0, 'upper': 34.0}, {'count': 0.0, 'lower': 34.0, 'upper': 35.0}, {'count': 0.0, 'lower': 35.0, 'upper': 36.0}, {'count': 0.0, 'lower': 36.0, 'upper': 37.0}, {'count': 0.0, 'lower': 37.0, 'upper': 38.0}, {'count': 0.0, 'lower': 38.0, 'upper': 39.0}, {'count': 0.0, 'lower': 39.0, 'upper': 40.0}, {'count': 0.0, 'lower': 40.0, 'upper': 41.0}, {'count': 0.0, 'lower': 41.0, 'upper': 42.0}, {'count': 0.0, 'lower': 42.0, 'upper': 43.0}, {'count': 0.0, 'lower': 43.0, 'upper': 44.0}, {'count': 0.0, 'lower': 44.0, 'upper': 45.0}, {'count': 0.0, 'lower': 45.0, 'upper': 46.0}, {'count': 0.0, 'lower': 46.0, 'upper': 47.0}, {'count': 0.0, 'lower': 47.0, 'upper': 48.0}, {'count': 0.0, 'lower': 48.0, 'upper': 49.0}, {'count': 0.0, 'lower': 49.0, 'upper': 50.0}, {'count': 0.0, 'lower': 50.0, 'upper': 51.0}, {'count': 0.0, 'lower': 51.0, 'upper': 52.0}, {'count': 0.0, 'lower': 52.0, 'upper': 53.0}, {'count': 0.0, 'lower': 53.0, 'upper': 54.0}, {'count': 0.0, 'lower': 54.0, 'upper': 55.0}, {'count': 0.0, 'lower': 55.0, 'upper': 56.0}, {'count': 0.0, 'lower': 56.0, 'upper': 57.0}, {'count': 0.0, 'lower': 57.0, 'upper': 58.0}, {'count': 0.0, 'lower': 58.0, 'upper': 59.0}, {'count': 0.0, 'lower': 59.0, 'upper': 60.0}]}}})
assert (expected_value == actual_df)
@staticmethod
def test_cols_impute():
actual_df =source_df.cols.impute('rank')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', FloatType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10.0, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7.0, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7.0, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8.0, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10.0, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8.0, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, 8.333333015441895, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_impute_all_columns():
actual_df =source_df.cols.impute('names','categorical')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), ('None', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_iqr():
actual_df =source_df.cols.iqr('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(13)
assert (expected_value == actual_df)
@staticmethod
def test_cols_iqr_all_columns():
actual_df =source_df.cols.iqr('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': 13, 'rank': 3, 'age': 0, 'weight(t)': 2.3000001907348633})
assert (expected_value == actual_df)
@staticmethod
def test_cols_is_na():
actual_df =source_df.cols.is_na('height(ft)')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', BooleanType(), False),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", False, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', False, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', False, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', False, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', True, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', False, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, True, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_is_na_all_columns():
actual_df =source_df.cols.is_na('*')
expected_df = op.create.df([('names', BooleanType(), False),('height(ft)', BooleanType(), False),('function', BooleanType(), False),('rank', BooleanType(), False),('age', BooleanType(), False),('weight(t)', BooleanType(), False),('japanese name', BooleanType(), False),('last position seen', BooleanType(), False),('date arrival', BooleanType(), False),('last date seen', BooleanType(), False),('attributes', BooleanType(), False),('Date Type', BooleanType(), False),('timestamp', BooleanType(), False),('Cybertronian', BooleanType(), False),('function(binary)', BooleanType(), False),('NullType', BooleanType(), False)], [(False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True), (False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True), (False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True), (False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True), (False, True, False, False, False, False, False, True, False, False, False, False, False, False, False, True), (False, False, False, False, False, True, False, True, False, False, False, False, False, False, False, True), (True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_keep():
actual_df =source_df.cols.keep('rank')
expected_df = op.create.df([('rank', ByteType(), True)], [(10,), (7,), (7,), (8,), (10,), (8,), (None,)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_kurt():
actual_df =source_df.cols.kurt('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(0.13863)
assert (expected_value == actual_df)
@staticmethod
def test_cols_kurt_all_columns():
actual_df =source_df.cols.kurt('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': 0.13863, 'rank': -1.5, 'age': nan, 'weight(t)': -1.43641})
assert (expected_value == actual_df)
@staticmethod
def test_cols_lower():
actual_df =source_df.cols.lower('function')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'first lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'none', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'battle station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_lower_all_columns():
actual_df =source_df.cols.lower('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("optim'us", -28, 'leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('jazz', 13, 'first lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('megatron', None, 'none', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('metroplex_)^$', 300, 'battle station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_mad():
actual_df =source_df.cols.mad('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(9)
assert (expected_value == actual_df)
@staticmethod
def test_cols_mad_all_columns():
actual_df =source_df.cols.mad('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': 9, 'rank': 1, 'age': 0, 'weight(t)': 1.6999998092651367})
assert (expected_value == actual_df)
@staticmethod
def test_cols_max():
actual_df =source_df.cols.max('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(300)
assert (expected_value == actual_df)
@staticmethod
def test_cols_max_abs_scaler():
actual_df =source_df.cols.max_abs_scaler('height(ft)')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', DoubleType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -0.09333333333333334, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 0.056666666666666664, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 0.08666666666666667, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 0.043333333333333335, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 1.0, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_max_all_columns():
actual_df =source_df.cols.max('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'names': 'ironhide&', 'height(ft)': 300, 'function': 'Security', 'rank': 10, 'age': 5000000, 'weight(t)': 5.7, 'japanese name': ['Roadbuster'], 'last position seen': '37.789563,-122.400356', 'date arrival': '1980/04/10', 'last date seen': '2016/09/10', 'attributes': [91.44000244140625, None], 'Date Type': '2016-09-10', 'timestamp': '2014-06-24 00:00:00', 'Cybertronian': 1, 'function(binary)': None, 'NullType': None})
assert (expected_value == actual_df)
@staticmethod
def test_cols_mean():
actual_df =source_df.cols.mean('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(65.6)
assert (expected_value == actual_df)
@staticmethod
def test_cols_mean_all_columns():
actual_df =source_df.cols.mean('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': 65.6, 'rank': 8.33333, 'age': 5000000.0, 'weight(t)': 3.56})
assert (expected_value == actual_df)
@staticmethod
def test_cols_median():
actual_df =source_df.cols.median('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(17)
assert (expected_value == actual_df)
@staticmethod
def test_cols_median_all_columns():
actual_df =source_df.cols.median('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': 17, 'rank': 8, 'age': 5000000, 'weight(t)': 4.0})
assert (expected_value == actual_df)
@staticmethod
def test_cols_min():
actual_df =source_df.cols.min('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(-28)
assert (expected_value == actual_df)
@staticmethod
def test_cols_min_all_columns():
actual_df =source_df.cols.min('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'names': 'Jazz', 'height(ft)': -28, 'function': 'Battle Station', 'rank': 7, 'age': 5000000, 'weight(t)': 1.8, 'japanese name': ['Bumble', 'Goldback'], 'last position seen': '10.642707,-71.612534', 'date arrival': '1980/04/10', 'last date seen': '2011/04/10', 'attributes': [None, 5700.0], 'Date Type': '2011-04-10', 'timestamp': '2014-06-24 00:00:00', 'Cybertronian': 1, 'function(binary)': None, 'NullType': None})
assert (expected_value == actual_df)
@staticmethod
def test_cols_min_max_scaler():
actual_df =source_df.cols.min_max_scaler('height(ft)')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', DoubleType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", 28.0, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 28.15, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 28.18, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 28.136666666666667, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 29.093333333333334, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_mode():
actual_df =source_df.cols.mode('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(None)
assert(expected_value == actual_df)
@staticmethod
def test_cols_mode_all_columns():
actual_df =source_df.cols.mode('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding([{'names': None}, {'height(ft)': None}, {'function': None}, {'rank': [8, 7, 10]}, {'age': 5000000}, {'weight(t)': None}, {'japanese name': None}, {'last position seen': None}, {'date arrival': '1980/04/10'}, {'last date seen': None}, {'attributes': None}, {'Date Type': None}, {'timestamp': datetime.datetime(2014, 6, 24, 0, 0)}, {'Cybertronian': True}, {'function(binary)': None}, {'NullType': None}])
assert(expected_value == actual_df)
@staticmethod
def test_cols_move_after():
actual_df =source_df.cols.move('rank','after','attributes')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('rank', ByteType(), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], 10, datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], 7, datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], 7, datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], 8, datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], 10, datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], 8, datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_move_before():
actual_df =source_df.cols.move('rank','before','attributes')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('rank', ByteType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', 10, [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', 7, [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', 7, [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', 8, [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', 10, [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', 8, [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_move_beginning():
actual_df =source_df.cols.move('rank','beginning')
expected_df = op.create.df([('rank', ByteType(), True),('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [(10, "Optim'us", -28, 'Leader', 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), (7, 'bumbl#ebéé ', 17, 'Espionage', 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), (7, 'ironhide&', 26, 'Security', 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), (8, 'Jazz', 13, 'First Lieutenant', 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), (10, 'Megatron', None, 'None', 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), (8, 'Metroplex_)^$', 300, 'Battle Station', 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_move_end():
actual_df =source_df.cols.move('rank','end')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('rank', ByteType(), True)], [("Optim'us", -28, 'Leader', 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 10), ('bumbl#ebéé ', 17, 'Espionage', 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 7), ('ironhide&', 26, 'Security', 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 7), ('Jazz', 13, 'First Lieutenant', 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 8), ('Megatron', None, 'None', 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, 10), ('Metroplex_)^$', 300, 'Battle Station', 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 8), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_mul():
actual_df =source_df.cols.mul(['height(ft)','rank'])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', FloatType(), True),('function', StringType(), True),('rank', FloatType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('mul', FloatType(), True)], [("Optim'us", -28.0, 'Leader', 10.0, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, -280.0), ('bumbl#ebéé ', 17.0, 'Espionage', 7.0, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 119.0), ('ironhide&', 26.0, 'Security', 7.0, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 182.0), ('Jazz', 13.0, 'First Lieutenant', 8.0, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 104.0), ('Megatron', None, 'None', 10.0, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None), ('Metroplex_)^$', 300.0, 'Battle Station', 8.0, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 2400.0), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_mul_all_columns():
actual_df =source_df.cols.mul('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', FloatType(), True),('function', StringType(), True),('rank', FloatType(), True),('age', FloatType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('mul', FloatType(), True)], [("Optim'us", -28.0, 'Leader', 10.0, 5000000.0, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, -6020000256.0), ('bumbl#ebéé ', 17.0, 'Espionage', 7.0, 5000000.0, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 1190000000.0), ('ironhide&', 26.0, 'Security', 7.0, 5000000.0, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 3640000000.0), ('Jazz', 13.0, 'First Lieutenant', 8.0, 5000000.0, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 936000000.0), ('Megatron', None, 'None', 10.0, 5000000.0, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None), ('Metroplex_)^$', 300.0, 'Battle Station', 8.0, 5000000.0, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_names():
actual_df =source_df.cols.names()
actual_df =json_enconding(actual_df)
expected_value =json_enconding(['names', 'height(ft)', 'function', 'rank', 'age', 'weight(t)', 'japanese name', 'last position seen', 'date arrival', 'last date seen', 'attributes', 'Date Type', 'timestamp', 'Cybertronian', 'function(binary)', 'NullType'])
assert (expected_value == actual_df)
@staticmethod
def test_cols_nest():
actual_df =source_df.cols.nest(['height(ft)','rank'],separator=' ',output_col='new col')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('new col', StringType(), False)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, '-28 10'), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, '17 7'), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, '26 7'), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, '13 8'), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, '10'), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, '300 8'), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, '')])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_nest_array():
actual_df =source_df.cols.nest(['height(ft)','rank','rank'],shape='array',output_col='new col')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', StringType(), True),('function', StringType(), True),('rank', StringType(), True),('age', StringType(), True),('weight(t)', StringType(), True),('japanese name', StringType(), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', StringType(), True),('Date Type', StringType(), True),('timestamp', StringType(), True),('Cybertronian', StringType(), True),('function(binary)', StringType(), True),('NullType', StringType(), True),('new col', ArrayType(StringType(),True), False)], [("Optim'us", '-28', 'Leader', '10', '5000000', '4.3', '[Inochi, Convoy]', '19.442735,-99.201111', '1980/04/10', '2016/09/10', '[8.5344, 4300.0]', '2016-09-10', '2014-06-24 00:00:00', 'true', 'Leader', None, ['-28', '10', '10']), ('bumbl#ebéé ', '17', 'Espionage', '7', '5000000', '2.0', '[Bumble, Goldback]', '10.642707,-71.612534', '1980/04/10', '2015/08/10', '[5.334, 2000.0]', '2015-08-10', '2014-06-24 00:00:00', 'true', 'Espionage', None, ['17', '7', '7']), ('ironhide&', '26', 'Security', '7', '5000000', '4.0', '[Roadbuster]', '37.789563,-122.400356', '1980/04/10', '2014/07/10', '[7.9248, 4000.0]', '2014-06-24', '2014-06-24 00:00:00', 'true', 'Security', None, ['26', '7', '7']), ('Jazz', '13', 'First Lieutenant', '8', '5000000', '1.8', '[Meister]', '33.670666,-117.841553', '1980/04/10', '2013/06/10', '[3.9624, 1800.0]', '2013-06-24', '2014-06-24 00:00:00', 'true', 'First Lieutenant', None, ['13', '8', '8']), ('Megatron', None, 'None', '10', '5000000', '5.7', '[Megatron]', None, '1980/04/10', '2012/05/10', '[, 5700.0]', '2012-05-10', '2014-06-24 00:00:00', 'true', 'None', None, [None, '10', '10']), ('Metroplex_)^$', '300', 'Battle Station', '8', '5000000', None, '[Metroflex]', None, '1980/04/10', '2011/04/10', '[91.44,]', '2011-04-10', '2014-06-24 00:00:00', 'true', 'Battle Station', None, ['300', '8', '8']), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, [None, None, None])])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_nest_vector():
source_df=op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader')), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage')), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security')), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'))])
actual_df =source_df.cols.nest(['rank','rank'],shape='vector',output_col='new col')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('new col', VectorUDT(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), DenseVector([10.0])), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), DenseVector([7.0])), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), DenseVector([7.0])), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), DenseVector([8.0]))])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_nest_vector_all_columns():
source_df=op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader')), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage')), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security')), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'))])
actual_df =source_df.cols.nest(['rank','rank'],shape='vector',output_col='new col')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('new col', VectorUDT(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), DenseVector([10.0])), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), DenseVector([7.0])), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), DenseVector([7.0])), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), DenseVector([8.0]))])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_percentile():
actual_df =source_df.cols.percentile('height(ft)',[0.05,0.25],1)
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': {'percentile': {'0.05': -28, '0.25': -28}}})
assert (expected_value == actual_df)
@staticmethod
def test_cols_percentile_all_columns():
actual_df =source_df.cols.percentile('*',[0.05,0.25],1)
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': {'percentile': {'0.05': -28, '0.25': -28}}, 'rank': {'percentile': {'0.05': 7, '0.25': 7}}, 'age': {'percentile': {'0.05': 5000000, '0.25': 5000000}}, 'weight(t)': {'percentile': {'0.05': 1.7999999523162842, '0.25': 1.7999999523162842}}})
assert (expected_value == actual_df)
@staticmethod
def test_cols_qcut():
actual_df =source_df.cols.qcut('rank',4)
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('rank_qcut', DoubleType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 3.0), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 1.0), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 1.0), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 2.0), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, 3.0), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 2.0)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_qcut_all_columns():
actual_df =source_df.cols.qcut('*',4)
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('height(ft)_qcut', DoubleType(), True),('rank_qcut', DoubleType(), True),('age_qcut', DoubleType(), True),('weight(t)_qcut', DoubleType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 0.0, 2.0, 1.0, 3.0), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 2.0, 1.0, 1.0, 2.0), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 3.0, 1.0, 1.0, 3.0), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 1.0, 2.0, 1.0, 1.0)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_range():
actual_df =source_df.cols.range('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': {'range': {'max': 300, 'min': -28}}})
assert (expected_value == actual_df)
@staticmethod
def test_cols_range_all_columns():
actual_df =source_df.cols.range('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'names': {'range': {'max': 'ironhide&', 'min': 'Jazz'}}, 'height(ft)': {'range': {'max': 300, 'min': -28}}, 'function': {'range': {'max': 'Security', 'min': 'Battle Station'}}, 'rank': {'range': {'max': 10, 'min': 7}}, 'age': {'range': {'max': 5000000, 'min': 5000000}}, 'weight(t)': {'range': {'max': 5.699999809265137, 'min': 1.7999999523162842}}, 'japanese name': {'range': {'max': ['Roadbuster'], 'min': ['Bumble', 'Goldback']}}, 'last position seen': {'range': {'max': '37.789563,-122.400356', 'min': '10.642707,-71.612534'}}, 'date arrival': {'range': {'max': '1980/04/10', 'min': '1980/04/10'}}, 'last date seen': {'range': {'max': '2016/09/10', 'min': '2011/04/10'}}, 'attributes': {'range': {'max': [91.44000244140625, None], 'min': [None, 5700.0]}}, 'Date Type': {'range': {'max': '2016-09-10', 'min': '2011-04-10'}}, 'timestamp': {'range': {'max': '2014-06-24 00:00:00', 'min': '2014-06-24 00:00:00'}}, 'Cybertronian': {'range': {'max': True, 'min': True}}, 'function(binary)': {'range': {'max': None, 'min': None}}, 'NullType': {'range': {'max': None, 'min': None}}})
assert (expected_value == actual_df)
@staticmethod
def test_cols_remove():
actual_df =source_df.cols.remove('function','i')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Esponage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Securty', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'Frst Leutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Staton', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_remove_accents():
actual_df =source_df.cols.remove_accents('function')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, 'None', None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_remove_accents_all_columns():
actual_df =source_df.cols.remove_accents('function')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, 'None', None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_remove_list():
actual_df =source_df.cols.remove('function',['a','i','Es'])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leder', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'ponge', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Securty', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'Frst Leutennt', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Bttle Stton', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_remove_list_output():
actual_df =source_df.cols.remove('function',['a','i','Es'],output_cols='function_new')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('function_new', StringType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 'Leder'), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 'ponge'), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 'Securty'), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 'Frst Leutennt'), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, 'None'), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 'Bttle Stton'), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_remove_special_chars():
actual_df =source_df.cols.remove_special_chars('function')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_remove_special_chars_all_columns():
actual_df =source_df.cols.remove_special_chars('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [('Optimus', -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '1944273599201111', '19800410', '20160910', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumblebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '1064270771612534', '19800410', '20150810', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37789563122400356', '19800410', '20140710', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33670666117841553', '19800410', '20130610', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '19800410', '20120510', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '19800410', '20110410', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_remove_white_spaces():
actual_df =source_df.cols.remove_white_spaces('function')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'FirstLieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'BattleStation', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_remove_white_spaces_all_columns():
actual_df =source_df.cols.remove_white_spaces('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', StringType(), True),('function', StringType(), True),('rank', StringType(), True),('age', StringType(), True),('weight(t)', StringType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', StringType(), True),('timestamp', TimestampType(), True),('Cybertronian', StringType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", '-28', 'Leader', '10', '5000000', '4.3', ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], '2016-09-10', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'Leader'), None), ('bumbl#ebéé', '17', 'Espionage', '7', '5000000', '2.0', ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], '2015-08-10', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'Espionage'), None), ('ironhide&', '26', 'Security', '7', '5000000', '4.0', ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], '2014-06-24', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'Security'), None), ('Jazz', '13', 'FirstLieutenant', '8', '5000000', '1.8', ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], '2013-06-24', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', '10', '5000000', '5.7', ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], '2012-05-10', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'None'), None), ('Metroplex_)^$', '300', 'BattleStation', '8', '5000000', None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], '2011-04-10', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_rename():
actual_df =source_df.cols.rename('rank','rank(old)')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank(old)', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_rename_function():
actual_df =source_df.cols.rename(str.upper)
expected_df = op.create.df([('NAMES', StringType(), True),('HEIGHT(FT)', ShortType(), True),('FUNCTION', StringType(), True),('RANK', ByteType(), True),('AGE', IntegerType(), True),('WEIGHT(T)', FloatType(), True),('JAPANESE NAME', ArrayType(StringType(),True), True),('LAST POSITION SEEN', StringType(), True),('DATE ARRIVAL', StringType(), True),('LAST DATE SEEN', StringType(), True),('ATTRIBUTES', ArrayType(FloatType(),True), True),('DATE TYPE', DateType(), True),('timestamp', TimestampType(), True),('CYBERTRONIAN', BooleanType(), True),('FUNCTION(BINARY)', BinaryType(), True),('NULLTYPE', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_rename_list():
actual_df =source_df.cols.rename(['height(ft)','height(ft)(tons)','rank','rank(old)'])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_replace_all_columns():
actual_df =source_df.cols.replace('*',['Jazz','Leader'],'Match')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Match', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Match', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_replace_chars():
actual_df =source_df.cols.replace('function',['F','E'],'Match',search_by='chars')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Matchspionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'Matchirst Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_replace_full():
actual_df =source_df.cols.replace('function',['First Lieutenant','Battle'],'Match',search_by='full')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'Match', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_replace_numeric():
actual_df =source_df.cols.replace('age',5000000,5,search_by='numeric')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Leader', 10, 5, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Security', 7, 5, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_replace_words():
actual_df =source_df.cols.replace('function',['Security','Leader'],'Match',search_by='words')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'Match', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'Match', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_reverse():
actual_df =source_df.cols.reverse('function')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'redaeL', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'eganoipsE', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'ytiruceS', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'tnanetueiL tsriF', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'enoN', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'noitatS elttaB', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_reverse_all_columns():
actual_df =source_df.cols.reverse('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("su'mitpO", -28, 'redaeL', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '111102.99-,537244.91', '01/40/0891', '01/90/6102', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), (' éébe#lbmub', 17, 'eganoipsE', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '435216.17-,707246.01', '01/40/0891', '01/80/5102', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('&edihnori', 26, 'ytiruceS', 7, 5000000, 4.0, ['Roadbuster'], '653004.221-,365987.73', '01/40/0891', '01/70/4102', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('zzaJ', 13, 'tnanetueiL tsriF', 8, 5000000, 1.7999999523162842, ['Meister'], '355148.711-,666076.33', '01/40/0891', '01/60/3102', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('nortageM', None, 'enoN', 10, 5000000, 5.699999809265137, ['Megatron'], None, '01/40/0891', '01/50/2102', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('$^)_xelporteM', 300, 'noitatS elttaB', 8, 5000000, None, ['Metroflex'], None, '01/40/0891', '01/40/1102', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_schema_dtype():
actual_df =source_df.cols.schema_dtype('rank')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(ByteType)
assert (expected_value == actual_df)
@staticmethod
def test_cols_select():
actual_df =source_df.cols.select(0,'height(ft)')
expected_df = op.create.df([('names', StringType(), True)], [("Optim'us",), ('bumbl#ebéé ',), ('ironhide&',), ('Jazz',), ('Megatron',), ('Metroplex_)^$',), (None,)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_select_by_dtypes_array():
actual_df =source_df.cols.select_by_dtypes('array')
expected_df = op.create.df([('japanese name', ArrayType(StringType(),True), True),('attributes', ArrayType(FloatType(),True), True)], [(['Inochi', 'Convoy'], [8.53439998626709, 4300.0]), (['Bumble', 'Goldback'], [5.334000110626221, 2000.0]), (['Roadbuster'], [7.924799919128418, 4000.0]), (['Meister'], [3.962399959564209, 1800.0]), (['Megatron'], [None, 5700.0]), (['Metroflex'], [91.44000244140625, None]), (None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_select_by_dtypes_float():
actual_df =source_df.cols.select_by_dtypes('float')
expected_df = op.create.df([('weight(t)', FloatType(), True)], [(4.300000190734863,), (2.0,), (4.0,), (1.7999999523162842,), (5.699999809265137,), (None,), (None,)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_select_by_dtypes_int():
actual_df =source_df.cols.select_by_dtypes('int')
expected_df = op.create.df([('age', IntegerType(), True)], [(5000000,), (5000000,), (5000000,), (5000000,), (5000000,), (5000000,), (None,)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_select_by_dtypes_str():
actual_df =source_df.cols.select_by_dtypes('str')
expected_df = op.create.df([('names', StringType(), True),('function', StringType(), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True)], [("Optim'us", 'Leader', '19.442735,-99.201111', '1980/04/10', '2016/09/10'), ('bumbl#ebéé ', 'Espionage', '10.642707,-71.612534', '1980/04/10', '2015/08/10'), ('ironhide&', 'Security', '37.789563,-122.400356', '1980/04/10', '2014/07/10'), ('Jazz', 'First Lieutenant', '33.670666,-117.841553', '1980/04/10', '2013/06/10'), ('Megatron', 'None', None, '1980/04/10', '2012/05/10'), ('Metroplex_)^$', 'Battle Station', None, '1980/04/10', '2011/04/10'), (None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_select_regex():
actual_df =source_df.cols.select('n.*',regex=True)
expected_df = op.create.df([('names', StringType(), True)], [("Optim'us",), ('bumbl#ebéé ',), ('ironhide&',), ('Jazz',), ('Megatron',), ('Metroplex_)^$',), (None,)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_skewness():
actual_df =source_df.cols.skewness('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(1.4049)
assert (expected_value == actual_df)
@staticmethod
def test_cols_skewness_all_columns():
actual_df =source_df.cols.skewness('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': 1.4049, 'rank': 0.3818, 'age': nan, 'weight(t)': 0.06521})
assert (expected_value == actual_df)
@staticmethod
def test_cols_sort():
actual_df =source_df.cols.sort()
expected_df = op.create.df([('age', IntegerType(), True),('attributes', ArrayType(FloatType(),True), True),('Cybertronian', BooleanType(), True),('date arrival', StringType(), True),('Date Type', DateType(), True),('function', StringType(), True),('function(binary)', BinaryType(), True),('height(ft)', ShortType(), True),('japanese name', ArrayType(StringType(),True), True),('last date seen', StringType(), True),('last position seen', StringType(), True),('names', StringType(), True),('NullType', NullType(), True),('rank', ByteType(), True),('timestamp', TimestampType(), True),('weight(t)', FloatType(), True)], [(5000000, [8.53439998626709, 4300.0], True, '1980/04/10', datetime.date(2016, 9, 10), 'Leader', bytearray(b'Leader'), -28, ['Inochi', 'Convoy'], '2016/09/10', '19.442735,-99.201111', "Optim'us", None, 10, datetime.datetime(2014, 6, 24, 0, 0), 4.300000190734863), (5000000, [5.334000110626221, 2000.0], True, '1980/04/10', datetime.date(2015, 8, 10), 'Espionage', bytearray(b'Espionage'), 17, ['Bumble', 'Goldback'], '2015/08/10', '10.642707,-71.612534', 'bumbl#ebéé ', None, 7, datetime.datetime(2014, 6, 24, 0, 0), 2.0), (5000000, [7.924799919128418, 4000.0], True, '1980/04/10', datetime.date(2014, 6, 24), 'Security', bytearray(b'Security'), 26, ['Roadbuster'], '2014/07/10', '37.789563,-122.400356', 'ironhide&', None, 7, datetime.datetime(2014, 6, 24, 0, 0), 4.0), (5000000, [3.962399959564209, 1800.0], True, '1980/04/10', datetime.date(2013, 6, 24), 'First Lieutenant', bytearray(b'First Lieutenant'), 13, ['Meister'], '2013/06/10', '33.670666,-117.841553', 'Jazz', None, 8, datetime.datetime(2014, 6, 24, 0, 0), 1.7999999523162842), (5000000, [None, 5700.0], True, '1980/04/10', datetime.date(2012, 5, 10), 'None', bytearray(b'None'), None, ['Megatron'], '2012/05/10', None, 'Megatron', None, 10, datetime.datetime(2014, 6, 24, 0, 0), 5.699999809265137), (5000000, [91.44000244140625, None], True, '1980/04/10', datetime.date(2011, 4, 10), 'Battle Station', bytearray(b'Battle Station'), 300, ['Metroflex'], '2011/04/10', None, 'Metroplex_)^$', None, 8, datetime.datetime(2014, 6, 24, 0, 0), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_sort_asc():
actual_df =source_df.cols.sort('asc')
expected_df = op.create.df([('age', IntegerType(), True),('attributes', ArrayType(FloatType(),True), True),('Cybertronian', BooleanType(), True),('date arrival', StringType(), True),('Date Type', DateType(), True),('function', StringType(), True),('function(binary)', BinaryType(), True),('height(ft)', ShortType(), True),('japanese name', ArrayType(StringType(),True), True),('last date seen', StringType(), True),('last position seen', StringType(), True),('names', StringType(), True),('NullType', NullType(), True),('rank', ByteType(), True),('timestamp', TimestampType(), True),('weight(t)', FloatType(), True)], [(5000000, [8.53439998626709, 4300.0], True, '1980/04/10', datetime.date(2016, 9, 10), 'Leader', bytearray(b'Leader'), -28, ['Inochi', 'Convoy'], '2016/09/10', '19.442735,-99.201111', "Optim'us", None, 10, datetime.datetime(2014, 6, 24, 0, 0), 4.300000190734863), (5000000, [5.334000110626221, 2000.0], True, '1980/04/10', datetime.date(2015, 8, 10), 'Espionage', bytearray(b'Espionage'), 17, ['Bumble', 'Goldback'], '2015/08/10', '10.642707,-71.612534', 'bumbl#ebéé ', None, 7, datetime.datetime(2014, 6, 24, 0, 0), 2.0), (5000000, [7.924799919128418, 4000.0], True, '1980/04/10', datetime.date(2014, 6, 24), 'Security', bytearray(b'Security'), 26, ['Roadbuster'], '2014/07/10', '37.789563,-122.400356', 'ironhide&', None, 7, datetime.datetime(2014, 6, 24, 0, 0), 4.0), (5000000, [3.962399959564209, 1800.0], True, '1980/04/10', datetime.date(2013, 6, 24), 'First Lieutenant', bytearray(b'First Lieutenant'), 13, ['Meister'], '2013/06/10', '33.670666,-117.841553', 'Jazz', None, 8, datetime.datetime(2014, 6, 24, 0, 0), 1.7999999523162842), (5000000, [None, 5700.0], True, '1980/04/10', datetime.date(2012, 5, 10), 'None', bytearray(b'None'), None, ['Megatron'], '2012/05/10', None, 'Megatron', None, 10, datetime.datetime(2014, 6, 24, 0, 0), 5.699999809265137), (5000000, [91.44000244140625, None], True, '1980/04/10', datetime.date(2011, 4, 10), 'Battle Station', bytearray(b'Battle Station'), 300, ['Metroflex'], '2011/04/10', None, 'Metroplex_)^$', None, 8, datetime.datetime(2014, 6, 24, 0, 0), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_sort_desc():
actual_df =source_df.cols.sort('desc')
expected_df = op.create.df([('weight(t)', FloatType(), True),('timestamp', TimestampType(), True),('rank', ByteType(), True),('NullType', NullType(), True),('names', StringType(), True),('last position seen', StringType(), True),('last date seen', StringType(), True),('japanese name', ArrayType(StringType(),True), True),('height(ft)', ShortType(), True),('function(binary)', BinaryType(), True),('function', StringType(), True),('Date Type', DateType(), True),('date arrival', StringType(), True),('Cybertronian', BooleanType(), True),('attributes', ArrayType(FloatType(),True), True),('age', IntegerType(), True)], [(4.300000190734863, datetime.datetime(2014, 6, 24, 0, 0), 10, None, "Optim'us", '19.442735,-99.201111', '2016/09/10', ['Inochi', 'Convoy'], -28, bytearray(b'Leader'), 'Leader', datetime.date(2016, 9, 10), '1980/04/10', True, [8.53439998626709, 4300.0], 5000000), (2.0, datetime.datetime(2014, 6, 24, 0, 0), 7, None, 'bumbl#ebéé ', '10.642707,-71.612534', '2015/08/10', ['Bumble', 'Goldback'], 17, bytearray(b'Espionage'), 'Espionage', datetime.date(2015, 8, 10), '1980/04/10', True, [5.334000110626221, 2000.0], 5000000), (4.0, datetime.datetime(2014, 6, 24, 0, 0), 7, None, 'ironhide&', '37.789563,-122.400356', '2014/07/10', ['Roadbuster'], 26, bytearray(b'Security'), 'Security', datetime.date(2014, 6, 24), '1980/04/10', True, [7.924799919128418, 4000.0], 5000000), (1.7999999523162842, datetime.datetime(2014, 6, 24, 0, 0), 8, None, 'Jazz', '33.670666,-117.841553', '2013/06/10', ['Meister'], 13, bytearray(b'First Lieutenant'), 'First Lieutenant', datetime.date(2013, 6, 24), '1980/04/10', True, [3.962399959564209, 1800.0], 5000000), (5.699999809265137, datetime.datetime(2014, 6, 24, 0, 0), 10, None, 'Megatron', None, '2012/05/10', ['Megatron'], None, bytearray(b'None'), 'None', datetime.date(2012, 5, 10), '1980/04/10', True, [None, 5700.0], 5000000), (None, datetime.datetime(2014, 6, 24, 0, 0), 8, None, 'Metroplex_)^$', None, '2011/04/10', ['Metroflex'], 300, bytearray(b'Battle Station'), 'Battle Station', datetime.date(2011, 4, 10), '1980/04/10', True, [91.44000244140625, None], 5000000), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_std():
actual_df =source_df.cols.std('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(132.66612)
assert (expected_value == actual_df)
@staticmethod
def test_cols_std_all_columns():
actual_df =source_df.cols.std('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': 132.66612, 'rank': 1.36626, 'age': 0.0, 'weight(t)': 1.64712})
assert (expected_value == actual_df)
@staticmethod
def test_cols_string_to_index():
source_df=op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True)])
actual_df =source_df.cols.string_to_index('rank')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('rank***STRING_TO_INDEX', DoubleType(), False)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, 2.0), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, 1.0), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, 1.0), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, 0.0), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, 2.0), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, 0.0)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_sub():
actual_df =source_df.cols.sub(['height(ft)','rank'])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', FloatType(), True),('function', StringType(), True),('rank', FloatType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('sub', FloatType(), True)], [("Optim'us", -28.0, 'Leader', 10.0, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, -38.0), ('bumbl#ebéé ', 17.0, 'Espionage', 7.0, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 10.0), ('ironhide&', 26.0, 'Security', 7.0, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 19.0), ('Jazz', 13.0, 'First Lieutenant', 8.0, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 5.0), ('Megatron', None, 'None', 10.0, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None), ('Metroplex_)^$', 300.0, 'Battle Station', 8.0, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 292.0), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_sub_all_columns():
actual_df =source_df.cols.sub('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', FloatType(), True),('function', StringType(), True),('rank', FloatType(), True),('age', FloatType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('sub', FloatType(), True)], [("Optim'us", -28.0, 'Leader', 10.0, 5000000.0, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, -5000042.5), ('bumbl#ebéé ', 17.0, 'Espionage', 7.0, 5000000.0, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, -4999992.0), ('ironhide&', 26.0, 'Security', 7.0, 5000000.0, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, -4999985.0), ('Jazz', 13.0, 'First Lieutenant', 8.0, 5000000.0, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, -4999997.0), ('Megatron', None, 'None', 10.0, 5000000.0, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None), ('Metroplex_)^$', 300.0, 'Battle Station', 8.0, 5000000.0, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_sum():
actual_df =source_df.cols.sum('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(328)
assert (expected_value == actual_df)
@staticmethod
def test_cols_sum_all_columns():
actual_df =source_df.cols.sum('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': 328, 'rank': 50, 'age': 30000000, 'weight(t)': 17.8})
assert (expected_value == actual_df)
@staticmethod
def test_cols_trim():
actual_df =source_df.cols.trim('height(ft)')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', StringType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", '-28', 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', '17', 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', '26', 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', '13', 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', '300', 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_trim_all_columns():
actual_df =source_df.cols.trim('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', StringType(), True),('function', StringType(), True),('rank', StringType(), True),('age', StringType(), True),('weight(t)', StringType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', StringType(), True),('timestamp', TimestampType(), True),('Cybertronian', StringType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", '-28', 'Leader', '10', '5000000', '4.3', ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], '2016-09-10', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'Leader'), None), ('bumbl#ebéé', '17', 'Espionage', '7', '5000000', '2.0', ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], '2015-08-10', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'Espionage'), None), ('ironhide&', '26', 'Security', '7', '5000000', '4.0', ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], '2014-06-24', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'Security'), None), ('Jazz', '13', 'First Lieutenant', '8', '5000000', '1.8', ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], '2013-06-24', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', '10', '5000000', '5.7', ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], '2012-05-10', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'None'), None), ('Metroplex_)^$', '300', 'Battle Station', '8', '5000000', None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], '2011-04-10', datetime.datetime(2014, 6, 24, 0, 0), 'true', bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unique():
actual_df =source_df.cols.unique('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': [300, 26, None, 13, 17, -28]})
assert(expected_value == actual_df)
@staticmethod
def test_cols_unique_all_columns():
actual_df =source_df.cols.unique('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'names': ['Jazz', None, 'bumbl#ebéé ', 'ironhide&', "Optim'us", 'Megatron', 'Metroplex_)^$'], 'height(ft)': [300, 26, None, 13, 17, -28], 'function': ['Leader', 'First Lieutenant', 'None', 'Security', None, 'Espionage', 'Battle Station'], 'rank': [None, 8, 7, 10], 'age': [None, 5000000], 'weight(t)': [5.699999809265137, None, 2.0, 1.7999999523162842, 4.0, 4.300000190734863], 'japanese name': [['Metroflex'], ['Bumble', 'Goldback'], None, ['Inochi', 'Convoy'], ['Megatron'], ['Meister'], ['Roadbuster']], 'last position seen': [None, '37.789563,-122.400356', '19.442735,-99.201111', '33.670666,-117.841553', '10.642707,-71.612534'], 'date arrival': [None, '1980/04/10'], 'last date seen': ['2011/04/10', None, '2012/05/10', '2013/06/10', '2015/08/10', '2014/07/10', '2016/09/10'], 'attributes': [[3.962399959564209, 1800.0], [None, 5700.0], None, [8.53439998626709, 4300.0], [7.924799919128418, 4000.0], [91.44000244140625, None], [5.334000110626221, 2000.0]], 'Date Type': [datetime.date(2012, 5, 10), datetime.date(2015, 8, 10), None, datetime.date(2011, 4, 10), datetime.date(2013, 6, 24), datetime.date(2014, 6, 24), datetime.date(2016, 9, 10)], 'timestamp': [datetime.datetime(2014, 6, 24, 0, 0), None], 'Cybertronian': [None, True], 'function(binary)': [bytearray(b'Leader'), bytearray(b'First Lieutenant'), bytearray(b'None'), bytearray(b'Security'), None, bytearray(b'Espionage'), bytearray(b'Battle Station')], 'NullType': [None]})
assert(expected_value == actual_df)
@staticmethod
def test_cols_unnest_array():
actual_df =source_df.cols.unnest('attributes')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('attributes_0', FloatType(), True),('attributes_1', FloatType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 8.53439998626709, 4300.0), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 5.334000110626221, 2000.0), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 7.924799919128418, 4000.0), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 3.962399959564209, 1800.0), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None, 5700.0), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 91.44000244140625, None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_array_all_columns():
actual_df =source_df.cols.unnest('attributes',index=[1, 2])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('attributes_0', FloatType(), True),('attributes_1', FloatType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 8.53439998626709, 4300.0), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 5.334000110626221, 2000.0), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 7.924799919128418, 4000.0), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 3.962399959564209, 1800.0), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None, 5700.0), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 91.44000244140625, None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_array_index():
actual_df =source_df.cols.unnest('attributes',index=2)
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('attributes_1', FloatType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 4300.0), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 2000.0), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 4000.0), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 1800.0), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, 5700.0), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_array_mmulti_index():
actual_df =source_df.cols.unnest('attributes',index=[1, 2])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('attributes_0', FloatType(), True),('attributes_1', FloatType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 8.53439998626709, 4300.0), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 5.334000110626221, 2000.0), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 7.924799919128418, 4000.0), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 3.962399959564209, 1800.0), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None, 5700.0), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 91.44000244140625, None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_array_multi_index():
actual_df =source_df.cols.unnest('attributes',index=[1, 2])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('attributes_0', FloatType(), True),('attributes_1', FloatType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 8.53439998626709, 4300.0), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 5.334000110626221, 2000.0), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 7.924799919128418, 4000.0), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 3.962399959564209, 1800.0), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, None, 5700.0), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, 91.44000244140625, None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_string():
actual_df =source_df.cols.unnest('date arrival','/',splits=3,output_cols=[('year', 'month', 'day')])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('year', StringType(), True),('month', StringType(), True),('day', StringType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, '1980', '04', '10'), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, '1980', '04', '10'), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, '1980', '04', '10'), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, '1980', '04', '10'), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, '1980', '04', '10'), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, '1980', '04', '10'), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_string_all_columns():
actual_df =source_df.cols.unnest('attributes',index=2)
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('attributes_1', FloatType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, 4300.0), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, 2000.0), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, 4000.0), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, 1800.0), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, 5700.0), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_string_index():
actual_df =source_df.cols.unnest('date arrival','/',splits=3,index=[1, 2, 3])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('date arrival_1', StringType(), True),('date arrival_2', StringType(), True),('date arrival_3', StringType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, '1980', '04', '10'), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, '1980', '04', '10'), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, '1980', '04', '10'), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, '1980', '04', '10'), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, '1980', '04', '10'), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, '1980', '04', '10'), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_string_infer_split():
actual_df =source_df.cols.unnest('date arrival','/')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('date arrival_0', StringType(), True),('date arrival_1', StringType(), True),('date arrival_2', StringType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, '1980', '04', '10'), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, '1980', '04', '10'), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, '1980', '04', '10'), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, '1980', '04', '10'), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, '1980', '04', '10'), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, '1980', '04', '10'), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_string_multi_colum_multi_output():
actual_df =source_df.cols.unnest(['date arrival','last date seen'],'/',output_cols=[('year1', 'month1'), ('year2', 'month2')])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('year1', StringType(), True),('month1', StringType(), True),('year2', StringType(), True),('month2', StringType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, '1980', '04', '2016', '09'), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, '1980', '04', '2015', '08'), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, '1980', '04', '2014', '07'), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, '1980', '04', '2013', '06'), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, '1980', '04', '2012', '05'), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, '1980', '04', '2011', '04'), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_string_multi_index():
actual_df =source_df.cols.unnest('date arrival','/',splits=3,index=[1, 2])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('date arrival_0', StringType(), True),('date arrival_1', StringType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, '1980', '04'), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, '1980', '04'), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, '1980', '04'), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, '1980', '04'), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, '1980', '04'), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, '1980', '04'), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_string_no_index():
actual_df =source_df.cols.unnest('date arrival','/',splits=3)
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('date arrival_0', StringType(), True),('date arrival_1', StringType(), True),('date arrival_2', StringType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, '1980', '04', '10'), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, '1980', '04', '10'), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, '1980', '04', '10'), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, '1980', '04', '10'), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, '1980', '04', '10'), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, '1980', '04', '10'), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_unnest_string_output_columns():
actual_df =source_df.cols.unnest('date arrival','/',splits=3,output_cols=[('year', 'month', 'day')])
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True),('year', StringType(), True),('month', StringType(), True),('day', StringType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None, '1980', '04', '10'), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None, '1980', '04', '10'), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None, '1980', '04', '10'), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None, '1980', '04', '10'), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None, '1980', '04', '10'), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None, '1980', '04', '10'), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_upper():
actual_df =source_df.cols.upper('function')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", -28, 'LEADER', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 17, 'ESPIONAGE', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 26, 'SECURITY', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 13, 'FIRST LIEUTENANT', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'NONE', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 300, 'BATTLE STATION', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_upper_all_columns():
actual_df =source_df.cols.upper('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("OPTIM'US", -28, 'LEADER', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('BUMBL#EBÉÉ ', 17, 'ESPIONAGE', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('IRONHIDE&', 26, 'SECURITY', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('JAZZ', 13, 'FIRST LIEUTENANT', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('MEGATRON', None, 'NONE', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('METROPLEX_)^$', 300, 'BATTLE STATION', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_values_to_cols():
source_df=op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True)], [("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True), ('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True)])
actual_df =source_df.cols.values_to_cols('rank')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', ShortType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('rank_7', DoubleType(), True),('rank_8', DoubleType(), True),('rank_10', DoubleType(), True)], [('Metroplex_)^$', 300, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, 0.0, 1.0, 0.0), ("Optim'us", -28, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, 0.0, 0.0, 1.0), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, 0.0, 0.0, 1.0), ('ironhide&', 26, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, 1.0, 0.0, 0.0), ('Jazz', 13, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, 0.0, 1.0, 0.0), ('bumbl#ebéé ', 17, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, 1.0, 0.0, 0.0)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_value_counts():
actual_df =source_df.cols.value_counts('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': [{'value': 300, 'count': 1}, {'value': 26, 'count': 1}, {'value': 13, 'count': 1}, {'value': 17, 'count': 1}, {'value': -28, 'count': 1}, {'value': None, 'count': 2}]})
assert(expected_value == actual_df)
@staticmethod
def test_cols_value_counts_all_columns():
actual_df =source_df.cols.value_counts('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'names': [{'value': 'Jazz', 'count': 1}, {'value': None, 'count': 1}, {'value': 'bumbl#ebéé ', 'count': 1}, {'value': 'ironhide&', 'count': 1}, {'value': "Optim'us", 'count': 1}, {'value': 'Megatron', 'count': 1}, {'value': 'Metroplex_)^$', 'count': 1}], 'height(ft)': [{'value': 300, 'count': 1}, {'value': 26, 'count': 1}, {'value': 13, 'count': 1}, {'value': 17, 'count': 1}, {'value': -28, 'count': 1}, {'value': None, 'count': 2}], 'function': [{'value': 'Leader', 'count': 1}, {'value': 'First Lieutenant', 'count': 1}, {'value': 'None', 'count': 1}, {'value': 'Security', 'count': 1}, {'value': None, 'count': 1}, {'value': 'Espionage', 'count': 1}, {'value': 'Battle Station', 'count': 1}], 'rank': [{'value': None, 'count': 1}, {'value': 8, 'count': 2}, {'value': 7, 'count': 2}, {'value': 10, 'count': 2}], 'age': [{'value': None, 'count': 1}, {'value': 5000000, 'count': 6}], 'weight(t)': [{'value': 5.699999809265137, 'count': 1}, {'value': 2.0, 'count': 1}, {'value': 1.7999999523162842, 'count': 1}, {'value': 4.0, 'count': 1}, {'value': 4.300000190734863, 'count': 1}, {'value': None, 'count': 2}], 'japanese name': [{'value': ['Metroflex'], 'count': 1}, {'value': ['Bumble', 'Goldback'], 'count': 1}, {'value': None, 'count': 1}, {'value': ['Inochi', 'Convoy'], 'count': 1}, {'value': ['Megatron'], 'count': 1}, {'value': ['Meister'], 'count': 1}, {'value': ['Roadbuster'], 'count': 1}], 'last position seen': [{'value': '37.789563,-122.400356', 'count': 1}, {'value': '19.442735,-99.201111', 'count': 1}, {'value': '33.670666,-117.841553', 'count': 1}, {'value': '10.642707,-71.612534', 'count': 1}, {'value': None, 'count': 3}], 'date arrival': [{'value': None, 'count': 1}, {'value': '1980/04/10', 'count': 6}], 'last date seen': [{'value': '2011/04/10', 'count': 1}, {'value': None, 'count': 1}, {'value': '2012/05/10', 'count': 1}, {'value': '2013/06/10', 'count': 1}, {'value': '2015/08/10', 'count': 1}, {'value': '2014/07/10', 'count': 1}, {'value': '2016/09/10', 'count': 1}], 'attributes': [{'value': [3.962399959564209, 1800.0], 'count': 1}, {'value': [None, 5700.0], 'count': 1}, {'value': None, 'count': 1}, {'value': [8.53439998626709, 4300.0], 'count': 1}, {'value': [7.924799919128418, 4000.0], 'count': 1}, {'value': [91.44000244140625, None], 'count': 1}, {'value': [5.334000110626221, 2000.0], 'count': 1}], 'Date Type': [{'value': '2012-05-10', 'count': 1}, {'value': '2015-08-10', 'count': 1}, {'value': None, 'count': 1}, {'value': '2011-04-10', 'count': 1}, {'value': '2013-06-24', 'count': 1}, {'value': '2014-06-24', 'count': 1}, {'value': '2016-09-10', 'count': 1}], 'timestamp': [{'value': None, 'count': 1}, {'value': '2014-06-24 00:00:00', 'count': 6}], 'Cybertronian': [{'value': None, 'count': 1}, {'value': True, 'count': 6}], 'function(binary)': [{'value': None, 'count': 1}, {'value': None, 'count': 1}, {'value': None, 'count': 1}, {'value': None, 'count': 1}, {'value': None, 'count': 1}, {'value': None, 'count': 1}, {'value': None, 'count': 1}], 'NullType': [{'value': None, 'count': 7}]})
assert (expected_value == actual_df)
@staticmethod
def test_cols_variance():
actual_df =source_df.cols.variance('height(ft)')
actual_df =json_enconding(actual_df)
expected_value =json_enconding(17600.3)
assert (expected_value == actual_df)
@staticmethod
def test_cols_variance_all_columns():
actual_df =source_df.cols.variance('*')
actual_df =json_enconding(actual_df)
expected_value =json_enconding({'height(ft)': 17600.3, 'rank': 1.86667, 'age': 0.0, 'weight(t)': 2.713})
assert (expected_value == actual_df)
@staticmethod
def test_cols_z_score():
actual_df =source_df.cols.z_score('height(ft)')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', DoubleType(), True),('function', StringType(), True),('rank', ByteType(), True),('age', IntegerType(), True),('weight(t)', FloatType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", 0.7055305454022474, 'Leader', 10, 5000000, 4.300000190734863, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 0.366333167805013, 'Espionage', 7, 5000000, 2.0, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 0.29849369228556616, 'Security', 7, 5000000, 4.0, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 0.39648404581365604, 'First Lieutenant', 8, 5000000, 1.7999999523162842, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 10, 5000000, 5.699999809265137, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 1.7668414513064827, 'Battle Station', 8, 5000000, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())
@staticmethod
def test_cols_z_score_all_columns():
actual_df =source_df.cols.z_score('*')
expected_df = op.create.df([('names', StringType(), True),('height(ft)', DoubleType(), True),('function', StringType(), True),('rank', DoubleType(), True),('age', DoubleType(), True),('weight(t)', DoubleType(), True),('japanese name', ArrayType(StringType(),True), True),('last position seen', StringType(), True),('date arrival', StringType(), True),('last date seen', StringType(), True),('attributes', ArrayType(FloatType(),True), True),('Date Type', DateType(), True),('timestamp', TimestampType(), True),('Cybertronian', BooleanType(), True),('function(binary)', BinaryType(), True),('NullType', NullType(), True)], [("Optim'us", 0.7055305454022474, 'Leader', 1.2198776221217045, None, 0.4492691429494289, ['Inochi', 'Convoy'], '19.442735,-99.201111', '1980/04/10', '2016/09/10', [8.53439998626709, 4300.0], datetime.date(2016, 9, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Leader'), None), ('bumbl#ebéé ', 0.366333167805013, 'Espionage', 0.9758977061467071, None, 0.9471076788576425, ['Bumble', 'Goldback'], '10.642707,-71.612534', '1980/04/10', '2015/08/10', [5.334000110626221, 2000.0], datetime.date(2015, 8, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Espionage'), None), ('ironhide&', 0.29849369228556616, 'Security', 0.9758977061467071, None, 0.2671329350624119, ['Roadbuster'], '37.789563,-122.400356', '1980/04/10', '2014/07/10', [7.924799919128418, 4000.0], datetime.date(2014, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Security'), None), ('Jazz', 0.39648404581365604, 'First Lieutenant', 0.24397259672390328, None, 1.0685317691994, ['Meister'], '33.670666,-117.841553', '1980/04/10', '2013/06/10', [3.962399959564209, 1800.0], datetime.date(2013, 6, 24), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'First Lieutenant'), None), ('Megatron', None, 'None', 1.2198776221217045, None, 1.2992373410954494, ['Megatron'], None, '1980/04/10', '2012/05/10', [None, 5700.0], datetime.date(2012, 5, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'None'), None), ('Metroplex_)^$', 1.7668414513064827, 'Battle Station', 0.24397259672390328, None, None, ['Metroflex'], None, '1980/04/10', '2011/04/10', [91.44000244140625, None], datetime.date(2011, 4, 10), datetime.datetime(2014, 6, 24, 0, 0), True, bytearray(b'Battle Station'), None), (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)])
assert (expected_df.collect() == actual_df.collect())