In [4]:
# import usual suspects

%matplotlib inline
import pandas as pd
import numpy as np
from numpy import arange
import math

import seaborn as sns
sns.set_style("white")

import scipy as sp
from scipy import linalg, optimize

import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from statsmodels.tsa.ar_model import AR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARMA

import collections
from collections import Counter

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.path as mpath
from matplotlib.dates import DateFormatter

import calendar
import datetime as dt
fromtimestamp = dt.datetime.fromtimestamp
from datetime import datetime

from pandas.plotting import autocorrelation_plot
from pandas.plotting import lag_plot

In [5]:
# import cleaned weekly ufo data

ufo = pd.read_csv('../Data/Datasets/cleandate_weekly_reval.csv',index_col=0)
# copy into w(orking)ufo df
wufo = ufo.copy().rename(columns={'rel_val_alltime':'weekly_ufopct'})
# confirm
wufo.head(1)

Unnamed: 0,startdateweek,weekly_ufocount,weekly_ufopct,year,month,weeknum,week_id
0,2014-07-06,470,100.0,2014,7,27,2014-27


In [6]:
# add date index

# add date col and sort values
wufo['date'] = ufo['startdateweek']
wufo = wufo.sort_values('date')
# add constant
wufo['const'] = 1
# make date col datetime and set index to date
wufo.date = pd.to_datetime(wufo.date)
# dateindex
wufo.index = pd.DatetimeIndex(wufo.date, freq='W')
# confirm
wufo.columns

Index(['startdateweek', 'weekly_ufocount', 'weekly_ufopct', 'year', 'month',
       'weeknum', 'week_id', 'date', 'const'],
      dtype='object')

In [14]:
# add numeric bool col for corona timeframe

row_indexes = [x for x in wufo[wufo['startdateweek']>'2019-09-01'].index]
wufo['corona'] =1
wufo.loc[row_indexes,'corona']=0
wufo.head()

Unnamed: 0_level_0,startdateweek,weekly_ufocount,weekly_ufopct,year,month,weeknum,week_id,date,const,corona,daynum,floatdate
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2006-01-08,2006-01-08,3,0.638298,2006,1,1,2006-1,2006-01-08,1,1,8,2006.8
2006-01-15,2006-01-15,8,1.702128,2006,1,2,2006-2,2006-01-15,1,1,15,2006.15
2006-01-22,2006-01-22,6,1.276596,2006,1,3,2006-3,2006-01-22,1,1,22,2006.22
2006-01-29,2006-01-29,8,1.702128,2006,1,4,2006-4,2006-01-29,1,1,29,2006.29
2006-02-05,2006-02-05,2,0.425532,2006,2,5,2006-5,2006-02-05,1,1,36,2006.36


In [17]:
wufo.corr()

Unnamed: 0,weekly_ufocount,weekly_ufopct,year,month,weeknum,const,corona,daynum,floatdate
weekly_ufocount,1.0,1.0,0.180968,0.211361,0.216221,,-0.186271,0.206386,0.172386
weekly_ufopct,1.0,1.0,0.180968,0.211361,0.216221,,-0.186271,0.206386,0.172386
year,0.180968,0.180968,1.0,-0.044285,-0.040588,,-0.438536,-0.044693,0.998817
month,0.211361,0.211361,-0.044285,1.0,0.959685,,-0.010704,0.996454,-0.05595
weeknum,0.216221,0.216221,-0.040588,0.959685,1.0,,-0.008856,0.960526,-0.053104
const,,,,,,,,,
corona,-0.186271,-0.186271,-0.438536,-0.010704,-0.008856,,1.0,-0.010956,-0.43785
daynum,0.206386,0.206386,-0.044693,0.996454,0.960526,,-0.010956,1.0,-0.056578
floatdate,0.172386,0.172386,0.998817,-0.05595,-0.053104,,-0.43785,-0.056578,1.0


In [13]:
######################## ######################## ######################## ######################## ########################
######################### ######################## ######################## ######################## ########################
#
# review standard deviations
#
print('\n\nstandard deviations UFO Count per Week:',(" "*20), "Total Count:",(" "*20),"Std in % of Total: \n", (" "*30),wufo.weekly_ufocount.std()," "*20,wufo.weekly_ufocount.sum()," "*20, wufo.weekly_ufocount.std()/(wufo.weekly_ufocount.sum()/100))
print('\n\nstandard deviations UFO Percentage per Week:',"\n"," "*30,wufo.weekly_ufopct.std())



standard deviations UFO Count per Week:                      Total Count:                      Std in % of Total: 
                                49.28664324758158                      71621                      0.06881591048377093


standard deviations UFO Percentage per Week: 
                                10.48651983991097


In [9]:
######################## ######################## ######################## ######################## ########################
######################### ######################## ######################## ######################## ########################

# adding float date col

wufo['daynum'] = wufo['date'].dt.dayofyear
wufo['floatdate'] =  wufo['year'].astype(str) +"." + wufo['daynum'].astype(str)
wufo['floatdate'] = wufo.floatdate.astype(float)

In [10]:
# datefloat linear model

from scipy.odr import *
x = wufo.floatdate
y = wufo.weekly_ufocount

def linear_func(p,x):
    m,c = p
    return m*x +c

linear_model = Model(linear_func)

data = RealData(x,y)

odr= ODR(data,linear_model,beta0=[10.,1000.])

out = odr.run()

out.pprint()

Beta: [ 6.68197910e+01 -1.34426451e+05]
Beta Std Error: [1.37140479e+01 2.76087839e+04]
Beta Covariance: [[ 1.07298990e+01 -2.16011673e+04]
 [-2.16011673e+04  4.34869416e+07]]
Residual Variance: 17.52813415227055
Inverse Condition #: 1.7327340252283585e-05
Reason(s) for Halting:
  Sum of squares convergence
