In [80]:
%matplotlib inline
import pandas as pd
import numpy as np
from numpy import arange
import math

import seaborn as sns
sns.set_style("white")

import scipy as sp
from scipy import linalg, optimize

import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std

import collections
from collections import Counter

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.path as mpath
from matplotlib.dates import DateFormatter

import calendar
import datetime as dt
fromtimestamp = dt.datetime.fromtimestamp


In [89]:
# import ufo data, assign ufo_id and sort
ufo = pd.read_csv('../data/ufo_clean.csv', index_col = 0)
ufo = ufo.rename(columns={'index':'ufo_id','year_week':'week_id'})
ufo['ufo_id'] = 1
ufo = ufo.sort_values('week_id')
# confirm
ufo.head()

Unnamed: 0,ufo_id,week_id,date_of_sight,year,month,week_of_year,state,city,sight_summary,ufo_shape
2,1,2006-1,2006-01-02,2006,1,1,MI,Ann Arbor,"Probable HOAX: 5 ufos, silent black objects, m...",Changing
3,1,2006-1,2006-01-02,2006,1,1,VA,Fredericksburg,clear sky looked like a star 10:00 am in the m...,Light
4,1,2006-1,2006-01-04,2006,1,1,PA,Scotland,"Small, white, light in the northeast sky...dis...",Light
53,1,2006-10,2006-03-06,2006,3,10,CA,North Hills,"((HOAX)) As I sat in my car taking my break, I...",Egg
54,1,2006-10,2006-03-07,2006,3,10,CA,Desert Hot Springs,Twin OrangeTriangles flying at extrememly high...,Chevron


In [82]:
# store counts per week_id in "ufosperweek" df
ufosperweek = pd.DataFrame(ufo['week_id'].value_counts()).reset_index().rename(columns={'index':'week_id','week_id': 'weekly_ufocount'})
# review
ufosperweek

Unnamed: 0,week_id,weekly_ufocount
0,2014-27,470
1,2015-45,406
2,2020-16,403
3,2013-27,399
4,2012-27,339
...,...,...
763,2006-9,5
764,2006-12,5
765,2006-1,3
766,2006-8,2


In [83]:
# check dtypes
ufosperweek.dtypes

week_id            object
weekly_ufocount     int64
dtype: object

In [84]:
# get max ufo per week count and save it to ufomax
ufoweekmax = ufosperweek.weekly_ufocount.max()

In [85]:
# define 1 percent value: (ufoweekmax = 100 pct)
onepc_ufoweek = ufoweekmax/100

In [86]:
# update ufosperweek with percentage values
ufosperweek['weekly_pct_entrtm'] = ufosperweek['weekly_ufocount']/onepc_ufoweek
# review
ufosperweek.head()

Unnamed: 0,week_id,weekly_ufocount,weekly_pct_entrtm
0,2014-27,470,100.0
1,2015-45,406,86.382979
2,2020-16,403,85.744681
3,2013-27,399,84.893617
4,2012-27,339,72.12766


In [87]:
# get ufo counts per year and save to "ufosperyear" df
ufosperyear = pd.DataFrame(ufo['year'].value_counts()).reset_index().rename(columns={'index':'year_id','year':'yearly_ufocount'})
# review
ufosperyear.head()

Unnamed: 0,year_id,yearly_ufocount
0,2014,7431
1,2012,6867
2,2013,6602
3,2015,5880
4,2016,4848


In [88]:
# get max ufo per week count and save it to ufomax
ufoyearmax = ufosperyear.yearly_ufocount.max()

# define 1 percent value: (ufoyearmax = 100 pct)
onepc_ufoyear = ufoyearmax/100

# assign col with percentage vals
ufosperyear['yearly_pct_entrtm'] = ufosperyear['yearly_ufocount']/onepc_ufoyear

# review
ufosperyear.head()

Unnamed: 0,year_id,yearly_ufocount,yearly_pct_entrtm
0,2014,7431,100.0
1,2012,6867,92.410174
2,2013,6602,88.844032
3,2015,5880,79.127977
4,2016,4848,65.24021


In [91]:
    # export and set to comment after
#ufosperyear.to_csv('../Data/Datasets/yearly_ufopct_entrtm.csv')
#ufosperweek.to_csv('../Data/Datasets/weekly_ufopct_entrtm.csv')