In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [12]:
df = pd.read_csv("../../../SharedData/total_state_data.csv")
df.date = pd.to_datetime(df.date)
df = df[(df.date.dt.year>=1995)&(df.date.dt.year<2020)]

mo_features = ['state', 'date', 'monthly_emissions', 'monthly_population', 'prcp', 'snow', 'tavg',
       'gdp_rel_2017_interp', 'monthly_energy_prod', 'monthly_energy_use', 'monthly_energy_flow', 
       'monthly_num_plants', 'monthly_energy_renew', 'monthly_energy_fossil', 'monthly_energy_total', 
       'monthly_renew_pct', 'monthly_fossil_pct', 'state_sq_ft',
       'monthly_pop_density']
df = df[mo_features]

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 14700 entries, 2940 to 17639
Data columns (total 19 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   state                  14700 non-null  object        
 1   date                   14700 non-null  datetime64[ns]
 2   monthly_emissions      14700 non-null  float64       
 3   monthly_population     14400 non-null  float64       
 4   prcp                   14334 non-null  float64       
 5   snow                   14334 non-null  float64       
 6   tavg                   14334 non-null  float64       
 7   gdp_rel_2017_interp    12720 non-null  float64       
 8   monthly_energy_prod    14400 non-null  float64       
 9   monthly_energy_use     14400 non-null  float64       
 10  monthly_energy_flow    14400 non-null  float64       
 11  monthly_num_plants     13493 non-null  float64       
 12  monthly_energy_renew   14400 non-null  float64       
 13  mon

In [39]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge

# ny_emissions_train, ny_emissions_test = train_test_split(ny_emissions, test_size=0.2, shuffle=True)

In [45]:
df_de = df[df.state == "DE"].reset_index()
df_de = df_de.dropna()

train_size = 0.8
df_de_train = df_de.iloc[:int(len(df_de)*train_size)]
df_de_test = df_de.iloc[int(len(df_de)*train_size):]

In [46]:
regr_predict = 'monthly_emissions'
regr_features = ['monthly_population', 'prcp', 'snow', 'tavg',
       'gdp_rel_2017_interp', 'monthly_energy_prod', 'monthly_energy_use', 'monthly_energy_flow', 
       'monthly_num_plants', 'monthly_energy_renew', 'monthly_energy_fossil', 'monthly_energy_total', 
       'monthly_renew_pct', 'monthly_fossil_pct', 'state_sq_ft',
       'monthly_pop_density']

In [47]:
alpha = [0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000]

## The degree of the polynomial we will fit
n=len(regr_features)

#$ These will hold our coefficient estimates
ridge_coefs = np.empty((len(alpha),n))

## for each alpha value
for i in range(len(alpha)):
    ## set up the ridge pipeline
    ## first scale
    ## then make polynomial features
    ## then fit the ridge regression model
    ridge_pipe = Pipeline([('scale',StandardScaler()),
                              # ridge model here, max_iter=5000000,
                              ('ridge', Ridge(alpha=alpha[i], max_iter=5000000))
                              ])
    
    ## fit the ridge
    ridge_pipe.fit(df_de_train[regr_features], df_de_train[regr_predict])
        
    # record the coefficients
    ridge_coefs[i,:] = ridge_pipe['ridge'].coef_

In [None]:
print("Ridge Coefficients")

pd.DataFrame(np.round(ridge_coefs,8),
            columns = ["b_" + str(i) for i
                       
                        in range(1,n+1)],
            index = ["alpha=" + str(a) for a in alpha])

Ridge Coefficients


Unnamed: 0,b_1,b_2,b_3,b_4,b_5,b_6,b_7,b_8,b_9,b_10,b_11,b_12,b_13,b_14,b_15,b_16
alpha=1e-05,-181927.017345,-6928.589764,37618.290518,30112.749789,-54171.587144,21361.762906,19228.30316,-16523.333585,40131.909388,486.479685,-882043.917774,936344.220576,1992.024435,191160.242159,0.0,-150159.547368
alpha=0.0001,-176048.823499,-6934.156728,37611.343352,30098.909085,-54156.45098,21171.086374,19280.745713,-16587.243698,40149.51688,-121.42259,-877986.15379,932089.140472,2280.414523,190251.147028,0.0,-144649.173834
alpha=0.001,-126059.232796,-6988.798674,37548.288267,29978.002719,-54054.141691,19548.831926,19741.866522,-17145.0692,40304.880678,-5606.726207,-841045.852842,893374.256694,5042.705008,181936.475663,0.0,-97807.214179
alpha=0.01,42349.215179,-7436.485297,37189.92338,29456.629091,-54639.889708,14067.196381,21836.066021,-19535.704111,41028.632037,-34531.801934,-629837.389281,672726.795714,24587.191472,133105.984263,0.0,59305.537052
alpha=0.1,45201.909012,-8814.908861,36338.555603,28897.71435,-58704.063584,14143.084104,24502.148624,-22045.187684,41992.217737,-59946.537402,-206640.330263,232134.958891,48830.160799,31478.910723,0.0,58915.328652
alpha=1,-129.760438,-9491.498388,35439.781073,28623.635043,-58005.458219,15718.913084,24979.268094,-22376.028281,42087.413998,-19810.543036,-23566.375217,41086.067302,10359.975837,-12893.976363,0.0,17055.298525
alpha=10,-11563.972195,-8321.89785,32523.010831,27530.053988,-43957.088356,10058.566109,21044.498433,-19092.358201,39749.147753,-7141.962176,3284.610557,13629.862411,-3975.843759,-15717.801018,0.0,14462.856159
alpha=100,-13248.174899,-3543.489206,18435.158584,20303.103527,-19232.28844,-7530.873508,13360.049446,-13174.146894,27912.332721,-7894.423852,5055.029937,6835.815437,-7858.982591,-4743.080416,0.0,13656.058318
alpha=1000,-7137.915959,230.951352,2862.368869,6157.301693,-6273.722753,-7296.798876,7108.700463,-7258.799223,8583.578409,-5438.209811,1608.258354,1696.24088,-5530.268582,59.396396,0.0,7015.489738
