In [385]:
# Import libraries and load dataset
import os
import sys
import importlib
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
import linearmodels as lm
import linearmodels.panel as lmp

import common.common as common
importlib.reload(common)

df = pd.read_csv("Data.csv")
yr = pd.Categorical(df.yr)
df = df.set_index(['country', 'yr'])
df["yr"] = yr
df

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,long,inbtou_volume,inbtou_volume_ov,inbtou_volume_sd,inbtou_volume_air,inbtou_volume_cruise,inbtou_spending,inbtou_spending_travel,inbtou_spending_transport,covid19_cases,covid19_deaths,covid19_mortality,yr
country,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
antigua_and_barbuda,2010,17.0608,-61.7964,8.128600e+05,,,,,568930500.0,,,0,0,0.000000,2010
antigua_and_barbuda,2011,17.0608,-61.7964,8.702400e+05,,,,,598619300.0,,,0,0,0.000000,2011
antigua_and_barbuda,2012,17.0608,-61.7964,8.426900e+05,,,,,666070300.0,,,0,0,0.000000,2012
antigua_and_barbuda,2013,17.0608,-61.7964,8.055500e+05,,,,,658851600.0,,,0,0,0.000000,2013
antigua_and_barbuda,2014,17.0608,-61.7964,7.925900e+05,,,,,595796200.0,,,0,0,0.000000,2014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
us_virgin_islands,2018,18.3358,-64.8963,1.922000e+09,,,,,,,,0,0,0.000000,2018
us_virgin_islands,2019,18.3358,-64.8963,2.074000e+09,,,,,,,,0,0,0.000000,2019
us_virgin_islands,2020,18.3358,-64.8963,8.620000e+08,,,,,,,,2036,23,0.011297,2020
us_virgin_islands,2021,18.3358,-64.8963,,,,,,,,,7612,66,0.008671,2021


In [422]:
def pooled(
    df,
    y_name:    str,
    x_vars:    list[str] = ["covid19_cases", "covid19_deaths"],
    incl_chrs: bool = False,
):
    if incl_chrs:
        x_vars += ["lat", "long"]
    df = df[[y_name] + x_vars].dropna()

    return lm.PooledOLS(df[y_name], sm.add_constant(df[x_vars])).fit(cov_type="robust")

print(pooled_ols(df, "inbtou_volume"))

                          PooledOLS Estimation Summary                          
Dep. Variable:          inbtou_volume   R-squared:                        0.0012
Estimator:                  PooledOLS   R-squared (Between):              0.0023
No. Observations:                 219   R-squared (Within):              -0.0171
Date:                Wed, Apr 19 2023   R-squared (Overall):              0.0012
Time:                        10:03:26   Log-likelihood                   -4705.3
Cov. Estimator:                Robust                                           
                                        F-statistic:                      0.1248
Entities:                          24   P-value                           0.8827
Avg Obs:                       9.1250   Distribution:                   F(2,216)
Min Obs:                       0.0000                                           
Max Obs:                       13.000   F-statistic (robust):             3.7876
                            

In [413]:
def random_effects(
    df,
    y_name:    str,
    x_vars:    list[str] = ["covid19_cases", "covid19_deaths"],
    incl_chrs: bool = True,
):
    if incl_chrs:
        x_vars += ["lat", "long"]
    df = df[[y_name] + x_vars].dropna()

    return lm.RandomEffects(df[y_name], sm.add_constant(df[x_vars])).fit(cov_type="robust")

print(random_effects(df, "inbtou_volume"))

                        RandomEffects Estimation Summary                        
Dep. Variable:          inbtou_volume   R-squared:                        0.0002
Estimator:              RandomEffects   R-squared (Between):              0.0001
No. Observations:                 219   R-squared (Within):            4.701e-05
Date:                Wed, Apr 19 2023   R-squared (Overall):           7.565e-06
Time:                        10:01:39   Log-likelihood                   -4399.0
Cov. Estimator:                Robust                                           
                                        F-statistic:                      0.0269
Entities:                          24   P-value                           0.9735
Avg Obs:                       9.1250   Distribution:                   F(2,216)
Min Obs:                       0.0000                                           
Max Obs:                       13.000   F-statistic (robust):             1.8613
                            

In [414]:
def fixed_effects(
    df,
    y_name:    str,
    x_vars:    list[str] = ["covid19_cases", "covid19_deaths"],
    incl_chrs: bool = False,
    clustered: bool = False,
):
    if incl_chrs:
        x_vars += ["lat", "long"]
    df = df[[y_name] + x_vars].dropna()

    return (
        lm.PanelOLS(df[y_name], sm.add_constant(df[x_vars])).fit(cov_type="robust")
        if not clustered
        else lm.PanelOLS(df[y_name], sm.add_constant(df[x_vars]), entity_effects=True).fit(
            cov_type="clustered",
            cluster_entity=True,
        )
    )

print(fixed_effects(df, "inbtou_volume"))

                          PanelOLS Estimation Summary                           
Dep. Variable:          inbtou_volume   R-squared:                        0.0012
Estimator:                   PanelOLS   R-squared (Between):              0.0023
No. Observations:                 219   R-squared (Within):              -0.0171
Date:                Wed, Apr 19 2023   R-squared (Overall):              0.0012
Time:                        10:01:41   Log-likelihood                   -4705.3
Cov. Estimator:                Robust                                           
                                        F-statistic:                      0.1248
Entities:                          24   P-value                           0.8827
Avg Obs:                       9.1250   Distribution:                   F(2,216)
Min Obs:                       0.0000                                           
Max Obs:                       13.000   F-statistic (robust):             3.7876
                            

In [415]:
print(lmp.compare({
    "Pooled": pooled(df, "inbtou_volume"),
    "RE":     random_effects(df, "inbtou_volume"),
    "FE":     fixed_effects(df, "inbtou_volume"),
}))

                           Model Comparison                          
                               Pooled              RE              FE
---------------------------------------------------------------------
Dep. Variable           inbtou_volume   inbtou_volume   inbtou_volume
Estimator                   PooledOLS   RandomEffects        PanelOLS
No. Observations                  219             219             219
Cov. Est.                      Robust          Robust          Robust
R-squared                      0.0012          0.0002          0.0012
R-Squared (Within)            -0.0171       4.701e-05         -0.0171
R-Squared (Between)            0.0023          0.0001          0.0023
R-Squared (Overall)            0.0012       7.565e-06          0.0012
F-statistic                    0.1248          0.0269          0.1248
P-value (F-stat)               0.8827          0.9735          0.8827
const                         1.2e+08        1.23e+08         1.2e+08
                    

In [423]:
print(lmp.compare({
    "FE Robust":    fixed_effects(df, "inbtou_spending"),
    "FE Clustered": fixed_effects(df, "inbtou_spending", clustered=True),
}))

                        Model Comparison                       
                                  FE Robust        FE Clustered
---------------------------------------------------------------
Dep. Variable               inbtou_spending     inbtou_spending
Estimator                          PanelOLS            PanelOLS
No. Observations                        184                 184
Cov. Est.                            Robust           Clustered
R-squared                            0.0023              0.2029
R-Squared (Within)                   0.0589              0.2029
R-Squared (Between)                 -0.0025             -0.0257
R-Squared (Overall)                  0.0023             -0.0099
F-statistic                          0.2109              21.122
P-value (F-stat)                     0.8101              0.0000
const                             1.538e+09           1.566e+09
                                   (10.859)            (120.01)
covid19_cases                        463