In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
import warnings

In [22]:
def OLS(df, geog, col, alpha):
    
    """Finds linear coef for increase in stat by a given geography from 1983 - 2016, as well
    as the pct change in population of the cities within the given geography
    
    NOTE 2020.03.01 - This will throw a run time warning if all values of a col are zero (e.g. can regress
    a bunch of zeros) ... See note in run_OLS. CPT 
    
    NOTE 2020.03.01 - Later in the day this issue is resolved by removing the offending cities. See comments
    in code. CPT
    
    NOTE 2021.07.23 - Fixeded all OLS errors. Key is to drop cities in 1983 that didn't have a population. We
    don't want them in the dataset anyway. 
    
    
    Args:
        df = HI stats dataframe
        geog = subset geography to calc people days regression
        col = col to regress on 
        alpha = ci alpha for coef
    """

    # Get results
    labels = []
    coef_list = []
    p_list = []
    df_out = pd.DataFrame()
    
    # turn warnings on
    warnings.filterwarnings("error")

    for label, df_geog in df.groupby(geog):
        print(label)

        # Get Data
        X_year = np.array(df_geog.groupby('year')['ID_HDC_G0'].mean().index).reshape((-1, 1))
        Y_stats = np.array(df_geog.groupby('year')[col].sum()).reshape((-1, 1))

        # Add Intercept
        X_year_2 = sm.add_constant(X_year)

        # Regress
        try:
            model = sm.OLS(Y_stats, X_year_2).fit()
        except RuntimeWarning:
            break
        
        # Get slope
        # first param in intercept coef, second is slope of line but if slope = 0, then intecept
        if len(model.params) == 2:
            coef = model.params[1]
            
        else:
            coef = model.params[0]
        
        #P value
        # deal with zero slope models
        if (model.params[0] == 0) & (model.params[1] == 0):
            p = np.nan
        else:
            p = model.pvalues[0]

        # Make lists
        labels.append(label)

        coef_list.append(coef)
        p_list.append(p)

    # Make data frame
    df_out[geog] = labels

    df_out['coef'] = coef_list
    df_out['p_value'] = [round(elem, 4) for elem in p_list]

    return df_out

# Check TREND

In [3]:
fn = '/scratch/cascade/UEH-daily/stats/wbgtmax30_EXP.json'
exp = pd.read_json(fn, orient = 'split')
exp.head()

Unnamed: 0,ID_HDC_G0,year,tot_days,P,P1983,P2016,people_days,people_days_heat,people_days_pop
0,18,2006,2,622283.428656,494664.495616,661553.577433,1244567.0,989328.991233,255237.866079
1,22,2006,2,70915.650551,52064.452435,73006.671133,141831.3,104128.904869,37702.396233
2,26,2006,3,247944.032154,194088.886834,268055.635628,743832.1,582266.660503,161565.435959
3,27,2006,2,90805.062622,80540.77994,93335.494324,181610.1,161081.55988,20528.565365
4,28,1998,1,68062.052369,59320.971209,91449.606255,68062.05,59320.971209,8741.08116


In [24]:
in_col = 'people_days_pop'
pop_trend = OLS(df = exp, geog = 'ID_HDC_G0', col = in_col, alpha = 0.05)

18
22
26
27
28
29
30
33
34
35
36
38
39
40
42
43
44
46
47
48
49
51
52
53
55
56
57
58
59
60
61
62
63
64
65
66
68
70
71
72
73
74
75
76
77
79
80
81
83
84
85
92
94
99
117
122
126
130
163
188
189
198
200
201
208
209
210
212
213
215
217
219
220
221
222
223
224
225
226
227
228
230
231
232
233
234
235
236
237
238
239
240
243
244
245
246
248
250
251
253
255
264
273
289
290
296
309
313
314
315
318
319
320
322
323
324
325
326
327
328
329
331
332
333
334
336
338
339
340
341
342
344
345
346
347
349
350
352
353
354
355
356
357
358
360
363
364
366
367
368
371
372
373
374
375
378
379
380
381
382
383
384
385
386
387
388
390
391
393
394
395
396
397
399
400
401
402
405
406
407
408
409
410
411
413
417
421
422
423
424
427
429
431
432
433
434
436
438
439
440
441
442
443
444
446
447
448
449
450
452
460
469
471
473
474
476
481
482
483
484
486
487
490
491
493
497
500
501
502
503
507
508
509
510
513
515
517
518
519
520
521
523
525
527
530
532
534
535
536
537
541
545
546
548
550
551
553
554
556
559
563
564
566
56

3250
3251
3256
3257
3261
3262
3263
3264
3266
3270
3271
3272
3277
3278
3279
3281
3283
3284
3291
3296
3304
3305
3306
3308
3317
3319
3320
3322
3326
3330
3341
3345
3347
3348
3352
3353
3356
3358
3360
3361
3363
3364
3365
3368
3369
3371
3372
3377
3379
3384
3386
3388
3390
3393
3397
3398
3403
3404
3406
3407
3412
3414
3416
3418
3419
3420
3422
3424
3428
3430
3433
3440
3441
3443
3444
3446
3448
3454
3456
3457
3459
3463
3464
3470
3471
3472
3473
3475
3478
3480
3481
3482
3483
3484
3486
3487
3488
3489
3491
3493
3494
3495
3497
3500
3503
3507
3514
3515
3519
3521
3523
3526
3528
3542
3545
3547
3550
3551
3552
3553
3555
3557
3566
3567
3570
3575
3577
3578
3579
3583
3589
3590
3591
3593
3596
3601
3602
3614
3615
3618
3619
3620
3621
3622
3625
3629
3632
3633
3634
3636
3639
3642
3647
3648
3650
3658
3659
3660
3663
3664
3685
3691
3692
3695
3703
3705
3710
3711
3713
3724
3726
3727
3729
3730
3732
3740
3741
3742
3745
3747
3748
3750
3751
3752
3760
3765
3767
3773
3775
3778
3783
3784
3787
3789
3790
3793
3796
3800
3804
3806


6720
6721
6722
6723
6724
6726
6727
6728
6729
6730
6732
6733
6734
6735
6736
6737
6739
6740
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6794
6795
6796
6797
6798
6799
6800
6801
6802
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6824
6825
6826
6827
6828
6829
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6896
6897
6898
6899
6900
6901
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935


8585
8586
8587
8588
8589
8590
8591
8592
8593
8594
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686
8687
8688
8689
8690
8691
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702
8703
8704
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734
8735
8736
8737
8738
8739
8740
8742
8743
8744
8745
8747
8748
8749
8750
8751
8752
8753
8754
8755
8756
8757
8758
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788


10450
10452
10453
10455
10457
10462
10463
10464
10465
10466
10468
10469
10471
10472
10474
10479
10480
10481
10482
10483
10486
10487
10489
10490
10491
10492
10494
10495
10496
10497
10498
10499
10500
10502
10506
10507
10508
10509
10510
10511
10512
10513
10514
10515
10516
10517
10518
10519
10520
10521
10522
10524
10527
10528
10529
10533
10536
10540
10541
10542
10543
10544
10546
10548
10550
10551
10552
10554
10555
10556
10558
10560
10561
10563
10566
10567
10571
10575
10577
10580
10581
10582
10583
10584
10585
10586
10588
10590
10591
10594
10595
10596
10597
10599
10600
10602
10603
10604
10605
10606
10607
10608
10609
10610
10613
10614
10615
10616
10617
10618
10619
10620
10627
10628
10629
10630
10631
10633
10634
10636
10639
10640
10641
10642
10643
10646
10649
10652
10653
10654
10655
10656
10658
10660
10661
10662
10664
10665
10668
10671
10672
10673
10674
10675
10680
10684
10685
10686
10687
10688
10689
10690
10691
10692
10693
10694
10695
10696
10697
10698
10699
10700
10702
10704
10705
10706
1070

12171
12172
12173
12174
12175
12176
12177
12178
12180
12181
12182
12183
12185
12186
12187
12188
12189
12190
12191
12192
12193
12194
12195
12196
12197
12198
12200
12201
12203
12205
12206
12207
12208
12209
12210
12211
12215
12216
12217
12218
12219
12220
12222
12223
12224
12225
12226
12228
12229
12231
12232
12234
12235
12236
12237
12238
12239
12240
12242
12243
12244
12245
12246
12247
12248
12249
12250
12252
12253
12254
12255
12256
12258
12259
12260
12261
12262
12263
12267
12268
12269
12270
12272
12274
12275
12276
12277
12278
12279
12280
12281
12282
12284
12285
12286
12288
12289
12290
12291
12292
12293
12294
12296
12297
12298
12299
12300
12301
12303
12306
12308
12309
12311
12312
12313
12315
12316
12317
12318
12319
12320
12322
12324
12325
12326
12327
12328
12329
12330
12331
12332
12333
12334
12336
12337
12338
12339
12340
12341
12342
12343
12344
12345
12346
12347
12348
12349
12350
12351
12352
12353
12354
12355
12356
12357
12358
12359
12360
12361
12362
12363
12364
12366
12367
12368
12369
1237

In [26]:
pop_trend[np.isnan(pop_trend['p_value'])]

Unnamed: 0,ID_HDC_G0,coef,p_value
382,733,0.0,
457,872,0.0,
1469,2656,0.0,
1895,3486,0.0,
2881,5974,0.0,
2911,6014,0.0,
2948,6091,0.0,
2949,6106,0.0,
2977,6181,0.0,
3353,6621,0.0,


# what's up with 733

In [30]:
error = 3486
test = exp[exp['ID_HDC_G0'] == error]
test

Unnamed: 0,ID_HDC_G0,year,tot_days,P,P1983,P2016,people_days,people_days_heat,people_days_pop
39291,3486,1983,1,195485.086889,195485.086889,996322.038391,195485.086889,195485.086889,0.0
25173,3486,1984,0,,195485.086889,996322.038391,0.0,0.0,0.0
25174,3486,1985,0,,195485.086889,996322.038391,0.0,0.0,0.0
25175,3486,1986,0,,195485.086889,996322.038391,0.0,0.0,0.0
25176,3486,1987,0,,195485.086889,996322.038391,0.0,0.0,0.0
25177,3486,1988,0,,195485.086889,996322.038391,0.0,0.0,0.0
25178,3486,1989,0,,195485.086889,996322.038391,0.0,0.0,0.0
25179,3486,1990,0,,195485.086889,996322.038391,0.0,0.0,0.0
25180,3486,1991,0,,195485.086889,996322.038391,0.0,0.0,0.0
25181,3486,1992,0,,195485.086889,996322.038391,0.0,0.0,0.0


In [23]:
in_col = 'people_days_pop'
OLS(df = test, geog = 'ID_HDC_G0', col = in_col, alpha = 0.05)

733


Unnamed: 0,ID_HDC_G0,coef,p_value
0,733,0.0,


In [11]:
df_geog = test
col = in_col

# Get Data
X_year = np.array(df_geog.groupby('year')['ID_HDC_G0'].mean().index).reshape((-1, 1))
Y_stats = np.array(df_geog.groupby('year')[col].sum()).reshape((-1, 1))

# Add Intercept
X_year_2 = sm.add_constant(X_year)

# Regress
model = sm.OLS(Y_stats, X_year_2).fit()

In [18]:
model.params

array([0., 0.])

In [None]:
zero_check = list(set(list(test['tot_days'].values)))

In [None]:
if len(zero_check) == 2:
    print(zero_check[0] * zero_check[1] == 0)