In [7]:
# import usual suspects

%matplotlib inline
import pandas as pd
import numpy as np
from numpy import arange
import math

import seaborn as sns
sns.set_style("white")

import scipy as sp
from scipy import linalg, optimize

import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from statsmodels.tsa.ar_model import AR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARMA

import collections
from collections import Counter

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.path as mpath
from matplotlib.dates import DateFormatter

import calendar
import datetime as dt
fromtimestamp = dt.datetime.fromtimestamp
from datetime import datetime

from pandas.plotting import autocorrelation_plot
from pandas.plotting import lag_plot

In [8]:
# import cleaned weekly ufo data

ufo = pd.read_csv('../Data/Datasets/cleandate_weekly_reval.csv',index_col=0)
# copy into w(orking)ufo df
wufo = ufo.copy().rename(columns={'rel_val_alltime':'weekly_ufopct'})
# confirm
wufo.head(1)

Unnamed: 0,startdateweek,weekly_ufocount,weekly_ufopct,year,month,weeknum,week_id
0,2014-07-06,470,100.0,2014,7,27,2014-27


In [9]:
# add date index

# add date col and sort values
wufo['date'] = ufo['startdateweek']
wufo = wufo.sort_values('date')
# add constant
wufo['const'] = 1
# make date col datetime and set index to date
wufo.date = pd.to_datetime(wufo.date)
# dateindex
wufo.index = pd.DatetimeIndex(wufo.date, freq='W')
# confirm
wufo.columns

Index(['startdateweek', 'weekly_ufocount', 'weekly_ufopct', 'year', 'month',
       'weeknum', 'week_id', 'date', 'const'],
      dtype='object')

In [10]:
# add numeric bool col for corona timeframe

row_indexes = [x for x in wufo[wufo['startdateweek']>'2019-09-01'].index]
wufo['corona'] =1
wufo.loc[row_indexes,'corona']=0
wufo.head()

Unnamed: 0_level_0,startdateweek,weekly_ufocount,weekly_ufopct,year,month,weeknum,week_id,date,const,corona
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2006-01-08,2006-01-08,3,0.638298,2006,1,1,2006-1,2006-01-08,1,1
2006-01-15,2006-01-15,8,1.702128,2006,1,2,2006-2,2006-01-15,1,1
2006-01-22,2006-01-22,6,1.276596,2006,1,3,2006-3,2006-01-22,1,1
2006-01-29,2006-01-29,8,1.702128,2006,1,4,2006-4,2006-01-29,1,1
2006-02-05,2006-02-05,2,0.425532,2006,2,5,2006-5,2006-02-05,1,1


In [11]:
# review general correlations

wufo.corr()

Unnamed: 0,weekly_ufocount,weekly_ufopct,year,month,weeknum,const,corona
weekly_ufocount,1.0,1.0,0.180968,0.211361,0.216221,,-0.186271
weekly_ufopct,1.0,1.0,0.180968,0.211361,0.216221,,-0.186271
year,0.180968,0.180968,1.0,-0.044285,-0.040588,,-0.438536
month,0.211361,0.211361,-0.044285,1.0,0.959685,,-0.010704
weeknum,0.216221,0.216221,-0.040588,0.959685,1.0,,-0.008856
const,,,,,,,
corona,-0.186271,-0.186271,-0.438536,-0.010704,-0.008856,,1.0


In [12]:
######################## ######################## ######################## ######################## ########################
######################### ######################## ######################## ######################## ########################
#
# review standard deviations
#
print('\n\nstandard deviations UFO Count per Week:',(" "*20), "Total Count:",(" "*20),"Std in % of Total: \n", (" "*30),wufo.weekly_ufocount.std()," "*20,wufo.weekly_ufocount.sum()," "*20, wufo.weekly_ufocount.std()/(wufo.weekly_ufocount.sum()/100))
print('\n\nstandard deviations UFO Percentage per Week:',"\n"," "*30,wufo.weekly_ufopct.std())



standard deviations UFO Count per Week:                      Total Count:                      Std in % of Total: 
                                49.28664324758158                      71621                      0.06881591048377093


standard deviations UFO Percentage per Week: 
                                10.48651983991097


In [13]:
######################## ######################## ######################## ######################## ########################
######################### ######################## ######################## ######################## ########################

# adding daynum col

wufo['daynum'] = wufo['date'].dt.dayofyear

In [26]:
# OLS: all time date related vals ⇔ ufos

results = sm.OLS(wufo["weekly_ufocount"], 
                  wufo[['const','daynum','month', 'year','corona']]).fit()
    # summary
results.summary()

0,1,2,3
Dep. Variable:,weekly_ufocount,R-squared:,0.096
Model:,OLS,Adj. R-squared:,0.091
Method:,Least Squares,F-statistic:,20.18
Date:,"Fri, 18 Sep 2020",Prob (F-statistic):,8.18e-16
Time:,09:41:11,Log-Likelihood:,-4033.4
No. Observations:,766,AIC:,8077.0
Df Residuals:,761,BIC:,8100.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3076.2764,901.162,-3.414,0.001,-4845.335,-1307.218
daynum,-0.2780,0.193,-1.443,0.149,-0.656,0.100
month,11.5737,5.889,1.965,0.050,0.013,23.135
year,1.5738,0.446,3.528,0.000,0.698,2.450
corona,-24.2026,7.448,-3.250,0.001,-38.824,-9.582

0,1,2,3
Omnibus:,458.048,Durbin-Watson:,0.94
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5699.759
Skew:,2.477,Prob(JB):,0.0
Kurtosis:,15.411,Cond. No.,1070000.0


In [None]:
##################################################################################
##################################################################################
### NOTES:
#
#       highest coef: month (11.5737)
#                 
##################################################################################
##################################################################################


In [28]:
# OLS: only month vals ⇔ ufos

results = sm.OLS(wufo["weekly_ufocount"], 
                  wufo[['const','month']]).fit()
    # summary
results.summary()

0,1,2,3
Dep. Variable:,weekly_ufocount,R-squared:,0.045
Model:,OLS,Adj. R-squared:,0.043
Method:,Least Squares,F-statistic:,35.73
Date:,"Fri, 18 Sep 2020",Prob (F-statistic):,3.48e-09
Time:,09:45:39,Log-Likelihood:,-4054.5
No. Observations:,766,AIC:,8113.0
Df Residuals:,764,BIC:,8122.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,73.9001,3.713,19.903,0.000,66.611,81.189
month,3.0386,0.508,5.977,0.000,2.041,4.036

0,1,2,3
Omnibus:,460.403,Durbin-Watson:,0.897
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5636.066
Skew:,2.502,Prob(JB):,0.0
Kurtosis:,15.31,Cond. No.,15.8


In [None]:
##################################################################################
##################################################################################
### NOTES:
#
#              Adjusted R-squared month: 0.043
#
##################################################################################
##################################################################################

In [27]:
# OLS: only year vals ⇔ ufos

results = sm.OLS(wufo["weekly_ufocount"], 
                  wufo[['const','year']]).fit()
    # summary
results.summary()

0,1,2,3
Dep. Variable:,weekly_ufocount,R-squared:,0.033
Model:,OLS,Adj. R-squared:,0.031
Method:,Least Squares,F-statistic:,25.87
Date:,"Fri, 18 Sep 2020",Prob (F-statistic):,4.6e-07
Time:,09:42:38,Log-Likelihood:,-4059.3
No. Observations:,766,AIC:,8123.0
Df Residuals:,764,BIC:,8132.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4136.9742,831.786,-4.974,0.000,-5769.832,-2504.116
year,2.1017,0.413,5.086,0.000,1.291,2.913

0,1,2,3
Omnibus:,439.286,Durbin-Watson:,0.866
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4936.513
Skew:,2.379,Prob(JB):,0.0
Kurtosis:,14.49,Cond. No.,955000.0


In [None]:
##################################################################################
##################################################################################
### NOTES:
#
#       Adjusted R-squared year: 0.031
#             
##################################################################################
##################################################################################


In [62]:
##################################################################################
#
# OLS: monthly mean corona/before ⇔ ufos

# import dataset monthly means
tsdf = pd.read_csv('../Data/Datasets/monmeans_corona.csv',index_col=0)

# add constant and numeric bool corona
tsdf['const'] = 1
tsdf['corona'] = 1
tsdf = tsdf.loc[tsdf['Timeframe:'] =='UFOs before corona (monthly mean)'].assign(corona=0)

In [67]:
# OLS: month, corona, before ⇔ ufos

results = sm.OLS(tsdf["UFOs"], 
                  tsdf[['const','Month','corona']]).fit()
    # summary
results.summary()

##################################################################################
##################################################################################

0,1,2,3
Dep. Variable:,UFOs,R-squared:,0.567
Model:,OLS,Adj. R-squared:,0.495
Method:,Least Squares,F-statistic:,7.859
Date:,"Fri, 18 Sep 2020",Prob (F-statistic):,0.031
Time:,10:18:38,Log-Likelihood:,-20.548
No. Observations:,8,AIC:,45.1
Df Residuals:,6,BIC:,45.26
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,11.4292,2.840,4.024,0.007,4.479,18.379
Month,1.5769,0.562,2.803,0.031,0.201,2.953
corona,0,0,,,0,0

0,1,2,3
Omnibus:,2.572,Durbin-Watson:,1.735
Prob(Omnibus):,0.276,Jarque-Bera (JB):,1.294
Skew:,0.944,Prob(JB):,0.524
Kurtosis:,2.439,Cond. No.,inf
