In [1364]:
# Import libraries
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import os
from linearmodels.panel import PanelOLS
import matplotlib.pyplot as plt
import seaborn as sns

In [1365]:
os.chdir("/Users/cindyj./Desktop/Econ330Project")
df = pd.read_csv('CindyData.csv')

In [1366]:
df['GDPgr']=df.groupby('State')['GDP'].pct_change(fill_method=None)*100

In [1367]:
# Calculate GDP growth for individual states
df['GDP'] = df.groupby('State')['GDP'].pct_change(fill_method=None) * 100

# Drop missing values for GDP
df = df.dropna(subset=['GDP'])

In [1368]:
# Create a sequential quarter index using the lambda function
df['quarter_number'] = df['Quarter'].apply(lambda x: (int(x[:4]) - 2020) * 4 + int(x[-1]))

In [1369]:
df

Unnamed: 0,quarter1,year,State,residential_percent_change_from_baseline,Quarter,Country_Region,Cases,GDP,Population,Value,Unemployment Rate,GDPgr,quarter_number
51,1,2021,Alabama,6.237078,2021Q1,US,42056559,2.615065,5050380.0,15.197143,3.966667,2.615065,5
52,1,2021,Alaska,8.419162,2021Q1,US,5059288,-2.212857,734923.0,12.847222,7.000000,-2.212857,5
53,1,2021,Arizona,7.730354,2021Q1,US,68441397,5.273573,7272487.0,18.188889,6.200000,5.273573,5
54,1,2021,Arkansas,7.134058,2021Q1,US,27106939,5.205179,3028443.0,17.651429,4.800000,5.205179,5
55,1,2021,California,10.155331,2021Q1,US,305703103,3.203874,39145060.0,16.597222,8.566667,3.203874,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
607,4,2022,Virginia,3.770651,2022Q4,US,12546715,1.974091,8679099.0,2.530556,3.066667,1.974091,12
608,4,2022,Washington,1.300699,2022Q4,US,10910661,2.425289,7784477.0,4.436111,4.266667,2.425289,12
609,4,2022,West Virginia,1.872054,2022Q4,US,3607942,4.140287,1774035.0,1.513889,3.833333,4.140287,12
610,4,2022,Wisconsin,1.435310,2022Q4,US,11216076,0.228543,5890543.0,2.819444,2.800000,0.228543,12


In [1370]:
df = df.dropna(subset=['GDP', 'Cases'])

In [1371]:
df['log_Cases'] = np.log(df['Cases'] + 1)  # Add 1 to avoid log(0)

In [1372]:
df['log_Population'] = np.log(df['Population'])
df['log_Value'] = np.log(df['Value']+1)

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [1373]:
df['UR'] = df['Unemployment Rate']

In [1374]:
print(df.columns)

Index(['quarter1', 'year', 'State', 'residential_percent_change_from_baseline',
       'Quarter', 'Country_Region', 'Cases', 'GDP', 'Population', 'Value',
       'Unemployment Rate', 'GDPgr', 'quarter_number', 'log_Cases',
       'log_Population', 'log_Value', 'UR'],
      dtype='object')


In [1375]:
# Reset the index to make 'State' and 'quarter_number' regular columns

df = df.reset_index()

# Now you can verify the columns
print(df.columns)  # 'State' and 'quarter_number' should now appear

# After performing necessary operations, set the multi-index again if needed
df = df.set_index(['State', 'quarter_number'])

# Confirm the index
print(df.index)


Index(['index', 'quarter1', 'year', 'State',
       'residential_percent_change_from_baseline', 'Quarter', 'Country_Region',
       'Cases', 'GDP', 'Population', 'Value', 'Unemployment Rate', 'GDPgr',
       'quarter_number', 'log_Cases', 'log_Population', 'log_Value', 'UR'],
      dtype='object')
MultiIndex([(             'Alabama',  5),
            (              'Alaska',  5),
            (             'Arizona',  5),
            (            'Arkansas',  5),
            (          'California',  5),
            (            'Colorado',  5),
            (         'Connecticut',  5),
            (            'Delaware',  5),
            ('District of Columbia',  5),
            (             'Florida',  5),
            ...
            (        'South Dakota', 12),
            (           'Tennessee', 12),
            (               'Texas', 12),
            (                'Utah', 12),
            (             'Vermont', 12),
            (            'Virginia', 12),
            (

log_case on GDP growth with entity fixed effect + time fixed effect

In [1377]:
df = df.dropna(subset=['GDP', 'Cases', 'log_Cases'])

In [1378]:
# Run the PanelOLS model
model = PanelOLS.from_formula('GDP ~ log_Cases', data=df)
results = model.fit(cov_type='clustered', cluster_entity=True)

# Print the results
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:                    GDP   R-squared:                        0.0319
Estimator:                   PanelOLS   R-squared (Between):              0.6290
No. Observations:                 561   R-squared (Within):               0.0204
Date:                Tue, Dec 03 2024   R-squared (Overall):              0.0319
Time:                        22:09:15   Log-likelihood                   -1841.9
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      18.435
Entities:                          51   P-value                           0.0000
Avg Obs:                       11.000   Distribution:                   F(1,560)
Min Obs:                       11.000                                           
Max Obs:                       11.000   F-statistic (robust):             249.99
                            

Run general regressions

In [1380]:
# Run the PanelOLS model
model = PanelOLS.from_formula('GDP ~ log_Cases + Population', data=df)
results = model.fit(cov_type='clustered', cluster_entity=True)

# Print the results
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:                    GDP   R-squared:                        0.0337
Estimator:                   PanelOLS   R-squared (Between):              0.4880
No. Observations:                 561   R-squared (Within):               0.0250
Date:                Tue, Dec 03 2024   R-squared (Overall):              0.0337
Time:                        22:09:15   Log-likelihood                   -1841.3
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      9.7574
Entities:                          51   P-value                           0.0001
Avg Obs:                       11.000   Distribution:                   F(2,559)
Min Obs:                       11.000                                           
Max Obs:                       11.000   F-statistic (robust):             96.030
                            

In [1381]:
# Run the PanelOLS model
model = PanelOLS.from_formula('GDP ~ log_Cases + log_Population', data=df)
results = model.fit(cov_type='clustered', cluster_entity=True)

# Print the results
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:                    GDP   R-squared:                        0.4615
Estimator:                   PanelOLS   R-squared (Between):             -0.1348
No. Observations:                 561   R-squared (Within):               0.4730
Date:                Tue, Dec 03 2024   R-squared (Overall):              0.4615
Time:                        22:09:15   Log-likelihood                   -1677.3
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      239.53
Entities:                          51   P-value                           0.0000
Avg Obs:                       11.000   Distribution:                   F(2,559)
Min Obs:                       11.000                                           
Max Obs:                       11.000   F-statistic (robust):             129.06
                            

In [1382]:
# Run the PanelOLS model
model = PanelOLS.from_formula('GDP ~ log_Cases + log_Population + log_Value', data=df)
results = model.fit(cov_type='clustered', cluster_entity=True)

# Print the results
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:                    GDP   R-squared:                        0.6360
Estimator:                   PanelOLS   R-squared (Between):              0.7655
No. Observations:                 492   R-squared (Within):               0.6078
Date:                Tue, Dec 03 2024   R-squared (Overall):              0.6360
Time:                        22:09:15   Log-likelihood                   -1299.6
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      284.77
Entities:                          50   P-value                           0.0000
Avg Obs:                       9.8400   Distribution:                   F(3,489)
Min Obs:                       8.0000                                           
Max Obs:                      10.0000   F-statistic (robust):             113.18
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


In [1383]:
# Run the PanelOLS model
model = PanelOLS.from_formula('GDP ~ log_Cases + log_Population + log_Value + residential_percent_change_from_baseline', data=df)
results = model.fit(cov_type='clustered', cluster_entity=True)

# Print the results
print(results)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


                          PanelOLS Estimation Summary                           
Dep. Variable:                    GDP   R-squared:                        0.6911
Estimator:                   PanelOLS   R-squared (Between):              0.7120
No. Observations:                 492   R-squared (Within):               0.6869
Date:                Tue, Dec 03 2024   R-squared (Overall):              0.6911
Time:                        22:09:15   Log-likelihood                   -1259.2
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      272.98
Entities:                          50   P-value                           0.0000
Avg Obs:                       9.8400   Distribution:                   F(4,488)
Min Obs:                       8.0000                                           
Max Obs:                      10.0000   F-statistic (robust):             90.930
                            

In [1384]:
# Run the PanelOLS model
model = PanelOLS.from_formula('GDP ~ log_Cases + log_Population + log_Value + residential_percent_change_from_baseline + UR', data=df)
results = model.fit(cov_type='clustered', cluster_entity=True)

# Print the results
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:                    GDP   R-squared:                        0.6996
Estimator:                   PanelOLS   R-squared (Between):              0.6999
No. Observations:                 492   R-squared (Within):               0.6998
Date:                Tue, Dec 03 2024   R-squared (Overall):              0.6996
Time:                        22:09:15   Log-likelihood                   -1252.3
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      226.86
Entities:                          50   P-value                           0.0000
Avg Obs:                       9.8400   Distribution:                   F(5,487)
Min Obs:                       8.0000                                           
Max Obs:                      10.0000   F-statistic (robust):             81.431
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


In [1385]:
# Run the PanelOLS model
model = PanelOLS.from_formula('GDP ~ log_Cases + log_Population + log_Value + residential_percent_change_from_baseline + UR + EntityEffects', data=df)
results = model.fit(cov_type='clustered', cluster_entity=True)

# Print the results
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:                    GDP   R-squared:                        0.7235
Estimator:                   PanelOLS   R-squared (Between):          -5.307e+05
No. Observations:                 492   R-squared (Within):               0.7235
Date:                Tue, Dec 03 2024   R-squared (Overall):          -9.713e+04
Time:                        22:09:15   Log-likelihood                   -1183.2
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      228.72
Entities:                          50   P-value                           0.0000
Avg Obs:                       9.8400   Distribution:                   F(5,437)
Min Obs:                       8.0000                                           
Max Obs:                      10.0000   F-statistic (robust):             75.252
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


In [1386]:
# Run the PanelOLS model
model = PanelOLS.from_formula('GDP ~ log_Cases + log_Population + log_Value + residential_percent_change_from_baseline + UR + EntityEffects + TimeEffects', data=df)
results = model.fit(cov_type='clustered', cluster_entity=True)

# Print the results
print(results)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


                          PanelOLS Estimation Summary                           
Dep. Variable:                    GDP   R-squared:                        0.1887
Estimator:                   PanelOLS   R-squared (Between):          -8.369e+05
No. Observations:                 492   R-squared (Within):               0.3166
Date:                Tue, Dec 03 2024   R-squared (Overall):          -1.532e+05
Time:                        22:09:15   Log-likelihood                   -1021.8
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      19.912
Entities:                          50   P-value                           0.0000
Avg Obs:                       9.8400   Distribution:                   F(5,428)
Min Obs:                       8.0000                                           
Max Obs:                      10.0000   F-statistic (robust):             10.395
                            

In [1387]:
df.describe()

Unnamed: 0,index,quarter1,year,residential_percent_change_from_baseline,Cases,GDP,Population,Value,Unemployment Rate,GDPgr,log_Cases,log_Population,log_Value,UR
count,561.0,561.0,561.0,561.0,561.0,561.0,561.0,550.0,561.0,561.0,561.0,561.0,492.0,561.0
mean,331.0,2.636364,2021.090909,5.452425,67925780.0,0.794287,6516689.0,10.304315,5.335175,0.794287,16.884808,15.18629,2.228814,5.335175
std,162.091024,1.068893,0.793234,3.362533,113024600.0,6.513888,7342979.0,15.487026,2.945392,6.513888,1.802614,1.033539,0.924729,2.945392
min,51.0,1.0,2020.0,-0.490196,44323.0,-20.395731,577664.0,-19.364516,1.933333,-20.395731,10.699282,13.266748,-2.330756,1.933333
25%,191.0,2.0,2020.0,3.011655,7510189.0,-2.889494,1791562.0,3.102083,3.333333,-2.889494,15.831771,14.398598,1.714426,3.333333
50%,331.0,3.0,2021.0,4.703831,27426140.0,2.137209,4508155.0,7.669048,4.333333,2.137209,17.127007,15.321399,2.287188,4.333333
75%,471.0,4.0,2022.0,7.043174,79281020.0,4.165498,7724566.0,15.472222,6.5,4.165498,18.188509,15.859916,2.824186,6.5
max,611.0,4.0,2022.0,21.483516,998719700.0,21.135568,39503200.0,193.341935,24.733333,21.135568,20.721985,17.491892,5.269619,24.733333
