In [1]:
import pandas as pd
from statsmodels.tsa.ardl import ardl_select_order
from statsmodels.tsa.ardl import ARDL

import numpy as np

import sys
sys.path.append("../src/model")

from pprint import pprint

from ARDL_model import ARDL_model_func, ARDL_states_separate, ARDL_model_func_jade

DATA_PATH = "../data/CLEAN"

# ARDL function - fitted ARDL models for all states (except Pennsylvania) separately (according to their corresponding consecutive years)
# Violence counts per state - "normal" box office revenue

### returns a dictionary of fitted ARDL models

In [2]:
DIRECTORY_PATH = DATA_PATH + "/FBI_91_12/01_States_Separately"

In [2]:
df_box_offices = pd.read_csv(DATA_PATH+"/Violent_Movies_final.tsv", sep="\t")

In [4]:
consecutive_years_per_state = pd.read_csv(DATA_PATH+"/FBI_91_12/02_Consecutive_Years_States/Consecutive_years_states.tsv", sep="\t")

In [5]:
fitted_ARDL_models = ARDL_states_separate(DIRECTORY_PATH, df_box_offices, consecutive_years_per_state, ARDL_model_func, time_fixed_effects=False)

In [15]:
test = fitted_ARDL_models["ARDL_Connecticut"]

In [16]:
test.summary()

0,1,2,3
Dep. Variable:,Violence_score,No. Observations:,776.0
Model:,"ARDL(4, 4)",Log Likelihood,-4473.741
Method:,Conditional MLE,S.D. of innovations,79.524
Date:,"Sat, 14 Dec 2024",AIC,8969.482
Time:,17:24:01,BIC,9020.621
Sample:,4,HQIC,8989.16
,776,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,19.5826,7.545,2.596,0.010,4.772,34.394
Violence_score.L1,0.5525,0.036,15.540,0.000,0.483,0.622
Violence_score.L2,0.1811,0.041,4.467,0.000,0.101,0.261
Violence_score.L3,0.0714,0.041,1.760,0.079,-0.008,0.151
Violence_score.L4,0.1795,0.035,5.074,0.000,0.110,0.249
Box office revenue.L0,1.419e-08,1.08e-08,1.320,0.187,-6.91e-09,3.53e-08
Box office revenue.L1,-6.134e-09,1.08e-08,-0.569,0.570,-2.73e-08,1.5e-08
Box office revenue.L2,-2.384e-08,1.08e-08,-2.210,0.027,-4.5e-08,-2.66e-09
Box office revenue.L3,-2.408e-08,1.08e-08,-2.223,0.027,-4.53e-08,-2.81e-09


# ARDL function - violence offense counts per week for all states together - "normal" box office revenue

### We just count all number of crimes in all states and add them up per week
### Just counting for timespan 2008 - 2012, since for this we have roughly consecutive data for all states (except Pennsylvania)

In [None]:
df_box_offices = pd.read_csv(DATA_PATH+"/Violent_Movies_final.tsv", sep="\t")
df_real_violence = pd.read_csv(DATA_PATH+"/FBI_91_12/00_All_States_Merged/violence_scores_merged.csv", sep=",")

In [13]:
df_real_violence_cut = df_real_violence[df_real_violence['Year'] >= 2008]

In [18]:
test_model = ARDL_model_func(df_box_offices, df_real_violence_cut, time_fixed_effects=False)

In [19]:
test_model.summary()

0,1,2,3
Dep. Variable:,Violence_score,No. Observations:,252.0
Model:,"ARDL(4, 4)",Log Likelihood,-2360.512
Method:,Conditional MLE,S.D. of innovations,3292.018
Date:,"Mon, 16 Dec 2024",AIC,4743.024
Time:,15:27:30,BIC,4781.672
Sample:,4,HQIC,4758.583
,252,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,5655.7914,2055.854,2.751,0.006,1605.796,9705.786
Violence_score.L1,0.6511,0.064,10.182,0.000,0.525,0.777
Violence_score.L2,0.0879,0.075,1.178,0.240,-0.059,0.235
Violence_score.L3,0.0398,0.074,0.541,0.589,-0.105,0.185
Violence_score.L4,0.1253,0.061,2.068,0.040,0.006,0.245
Box office revenue.L0,-1.09e-07,6.25e-07,-0.174,0.862,-1.34e-06,1.12e-06
Box office revenue.L1,-4.678e-09,6.25e-07,-0.007,0.994,-1.24e-06,1.23e-06
Box office revenue.L2,2.466e-07,6.26e-07,0.394,0.694,-9.87e-07,1.48e-06
Box office revenue.L3,-1.942e-06,6.26e-07,-3.100,0.002,-3.18e-06,-7.08e-07


# ARDL function - violence offense ratios per state - "normal" box office revenue

### We add up the violent crimes per state per week and divide by the total number of crimes in this state in this year
### Applied for Alabama state in the time span 2008 - 2012 (since we have consecutive data here)

In [20]:
df_real_violence_ratio = pd.read_csv(DATA_PATH+"/FBI_91_12/03_Violence_Ratio_Alabama/Violence_ratio_alabama_2008_2012.csv", sep=",")

In [21]:
test_model_2 = ARDL_model_func(df_box_offices, df_real_violence_ratio, time_fixed_effects=False)

In [23]:
test_model_2.summary()

0,1,2,3
Dep. Variable:,Violence_score,No. Observations:,250.0
Model:,"ARDL(4, 0, 2)",Log Likelihood,1066.999
Method:,Conditional MLE,S.D. of innovations,0.003
Date:,"Mon, 16 Dec 2024",AIC,-2113.998
Time:,15:33:25,BIC,-2078.945
Sample:,4,HQIC,-2099.884
,250,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0122,0.002,6.360,0.000,0.008,0.016
Violence_score.L1,0.1339,0.062,2.162,0.032,0.012,0.256
Violence_score.L2,0.0821,0.063,1.313,0.190,-0.041,0.205
Violence_score.L3,-0.0144,0.063,-0.229,0.819,-0.138,0.109
Violence_score.L4,0.1654,0.062,2.653,0.009,0.043,0.288
no. films released.L0,-0.0004,0.000,-3.337,0.001,-0.001,-0.000
Box office revenue.L0,1.077e-12,6.16e-13,1.748,0.082,-1.37e-13,2.29e-12
Box office revenue.L1,8.254e-13,6.08e-13,1.357,0.176,-3.73e-13,2.02e-12
Box office revenue.L2,1.186e-12,6.1e-13,1.945,0.053,-1.55e-14,2.39e-12


# ARDL function - violence counts per state - normalized box office revenue from Jade

### We count all violent crimes in one state, add them up weekly
### Normalizing box office revenue as proposed by Jade
### Applied for Alabama state in the time span 2008 - 2012 (since we have consecutive data here)

In [3]:
score_jade = pd.read_csv(DATA_PATH+"/movie_violence_metric.csv", sep=",")

In [4]:
score_jade.head()

Unnamed: 0,Year,Week,Metric
0,1901,8,1.0
1,1903,49,1.0
2,1908,10,1.0
3,1908,29,1.0
4,1908,30,1.0


In [5]:
df_real_violence = pd.read_csv(DATA_PATH+"/FBI_91_12/00_All_States_Merged/violence_scores_merged.csv", sep=",")

In [6]:
test_model = ARDL_model_func_jade(score_jade, df_real_violence, time_fixed_effects=False)

In [7]:
test_model.summary()

0,1,2,3
Dep. Variable:,Violence_score,No. Observations:,1152.0
Model:,"ARDL(9, 1)",Log Likelihood,-10602.728
Method:,Conditional MLE,S.D. of innovations,2584.505
Date:,"Mon, 16 Dec 2024",AIC,21231.457
Time:,15:45:02,BIC,21296.995
Sample:,9,HQIC,21256.204
,1152,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,275.9903,163.282,1.690,0.091,-44.380,596.361
Violence_score.L1,0.5738,0.030,19.352,0.000,0.516,0.632
Violence_score.L2,0.1293,0.034,3.775,0.000,0.062,0.196
Violence_score.L3,0.1041,0.034,3.023,0.003,0.037,0.172
Violence_score.L4,0.1648,0.035,4.773,0.000,0.097,0.233
Violence_score.L5,0.0381,0.035,1.091,0.276,-0.030,0.107
Violence_score.L6,0.0588,0.035,1.696,0.090,-0.009,0.127
Violence_score.L7,-0.0021,0.035,-0.061,0.951,-0.070,0.066
Violence_score.L8,0.0110,0.034,0.320,0.749,-0.056,0.078


# ARDL function - violence offense z score - "normal" box office revenue

### We calculate the z score (by Jen) for one state, end up with one z-score per week
### Applied for Alabama state in the time span 2008 - 2012 (since we have consecutive data here)

In [8]:
df_real_violence_z = pd.read_csv(DATA_PATH+"/FBI_91_12/04_Violence_z_Alabama/Violence_z_alabama_2008_2012.csv", sep=",")

In [9]:
test_model_3 = ARDL_model_func(df_box_offices, df_real_violence_z, time_fixed_effects=False)

In [10]:
test_model_3.summary()

0,1,2,3
Dep. Variable:,Violence_score,No. Observations:,250.0
Model:,"ARDL(2, 0)",Log Likelihood,-296.644
Method:,Conditional MLE,S.D. of innovations,0.8
Date:,"Mon, 16 Dec 2024",AIC,603.288
Time:,16:27:38,BIC,620.855
Sample:,2,HQIC,610.36
,250,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.1121,0.080,1.406,0.161,-0.045,0.269
Violence_score.L1,-0.3409,0.063,-5.450,0.000,-0.464,-0.218
Violence_score.L2,-0.1819,0.062,-2.916,0.004,-0.305,-0.059
no. films released.L0,-0.0670,0.030,-2.201,0.029,-0.127,-0.007


# ARDL function - violence z-score (by Jen) - normalized box office revenue from Jade

### We compute z-score for all states separately, add them up by week and year
### Normalizing box office revenue as proposed by Jade
### Applied for all states (except Pennsylvania) in the time frame 2008 - 2012

In [None]:
DIRECTORY_PATH_2 = "../data/RAW/FBI_91_12"

In [None]:
z_scores_merged = z_score_all_states_merged(DIRECTORY_PATH_2, df_box_offices, consecutive_years_per_state, ARDL_model_func, time_fixed_effects=False)