In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from numpy import arange
import math

import seaborn as sns
sns.set_style("white")

import scipy as sp
from scipy import linalg, optimize

import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std

import collections
from collections import Counter

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.path as mpath
from matplotlib.dates import DateFormatter

import calendar
import datetime as dt
fromtimestamp = dt.datetime.fromtimestamp


In [2]:
df = pd.read_csv('ufo.csv')

In [3]:
df.head(1)

Unnamed: 0,datetime,City,State,Shape,Duration,Summary,Posted,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,2006-01-01 01:20:00,I-80 (unknown city proximity),NY,Formation,0 days 00:00:20.000000000,"Traveling from Brooklyn, NY to Groton, NY on I...",6/7/19,,,,


In [4]:
wdf = df.copy()
wdf.columns

Index(['datetime', 'City', 'State', 'Shape', 'Duration', 'Summary', 'Posted',
       'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10'],
      dtype='object')

In [5]:
wdf['datetype'] = pd.to_datetime(wdf['datetime']).dt.date
wdf.head()

Unnamed: 0,datetime,City,State,Shape,Duration,Summary,Posted,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,datetype
0,2006-01-01 01:20:00,I-80 (unknown city proximity),NY,Formation,0 days 00:00:20.000000000,"Traveling from Brooklyn, NY to Groton, NY on I...",6/7/19,,,,,2006-01-01
1,2006-01-01 23:00:00,San Antonio/Laredo (between),TX,Triangle,0 days 00:03:00.000000000,Three separate sightins which ocurred in rural...,6/9/09,,,,,2006-01-01
2,2006-01-02 00:00:00,Ann Arbor,MI,Changing,0 days 00:03:00.000000000,"Probable HOAX: 5 ufos, silent black objects, m...",10/30/06,,,,,2006-01-02
3,2006-01-02 00:00:00,Fredericksburg,VA,Light,0 days 00:02:00.000000000,clear sky looked like a star 10:00 am in the m...,2/1/07,,,,,2006-01-02
4,2006-01-04 07:45:00,Scotland,PA,Light,0 days 00:01:00.000000000,"Small, white, light in the northeast sky...dis...",2/1/07,,,,,2006-01-04


In [6]:
wdf['dttyp'] = pd.to_datetime(wdf['datetype'])

In [7]:
wdf['weeknum'] = wdf['dttyp'].dt.week

  wdf['weeknum'] = wdf['dttyp'].dt.week


In [8]:
wdf['weekofyear'] = wdf['dttyp'].dt.weekofyear

  wdf['weekofyear'] = wdf['dttyp'].dt.weekofyear


In [9]:
wdf['year'] = wdf['dttyp'].dt.year

In [10]:
wdf['year_week'] = wdf['year'].astype(str) + "-" + wdf['weekofyear'].astype(str)

In [11]:
wdf = wdf.drop(columns=['Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'datetype'])

In [12]:
wdf = wdf.drop(columns=['datetime','weeknum'])

In [13]:
wdf.head()

Unnamed: 0,City,State,Shape,Duration,Summary,Posted,dttyp,weekofyear,year,year_week
0,I-80 (unknown city proximity),NY,Formation,0 days 00:00:20.000000000,"Traveling from Brooklyn, NY to Groton, NY on I...",6/7/19,2006-01-01,52,2006,2006-52
1,San Antonio/Laredo (between),TX,Triangle,0 days 00:03:00.000000000,Three separate sightins which ocurred in rural...,6/9/09,2006-01-01,52,2006,2006-52
2,Ann Arbor,MI,Changing,0 days 00:03:00.000000000,"Probable HOAX: 5 ufos, silent black objects, m...",10/30/06,2006-01-02,1,2006,2006-1
3,Fredericksburg,VA,Light,0 days 00:02:00.000000000,clear sky looked like a star 10:00 am in the m...,2/1/07,2006-01-02,1,2006,2006-1
4,Scotland,PA,Light,0 days 00:01:00.000000000,"Small, white, light in the northeast sky...dis...",2/1/07,2006-01-04,1,2006,2006-1


In [14]:
wdf['dttyp'].dtypes

dtype('<M8[ns]')

In [15]:
ufodata = pd.DataFrame(wdf['year_week'])

In [16]:
ufodata['date_of_sight'] = wdf['dttyp']

In [17]:
ufodata['year'] = wdf['year']
ufodata['month'] = wdf['dttyp'].dt.month
ufodata['week_of_year'] = wdf['weekofyear']
ufodata['state'] = wdf['State']
ufodata['city'] = wdf['City']
ufodata['sight_summary'] = wdf['Summary']


In [18]:
ufodata['ufo_shape'] = wdf['Shape']

In [19]:
ufodata = ufodata.reset_index()

In [20]:
ufodata.head()

Unnamed: 0,index,year_week,date_of_sight,year,month,week_of_year,state,city,sight_summary,ufo_shape
0,0,2006-52,2006-01-01,2006,1,52,NY,I-80 (unknown city proximity),"Traveling from Brooklyn, NY to Groton, NY on I...",Formation
1,1,2006-52,2006-01-01,2006,1,52,TX,San Antonio/Laredo (between),Three separate sightins which ocurred in rural...,Triangle
2,2,2006-1,2006-01-02,2006,1,1,MI,Ann Arbor,"Probable HOAX: 5 ufos, silent black objects, m...",Changing
3,3,2006-1,2006-01-02,2006,1,1,VA,Fredericksburg,clear sky looked like a star 10:00 am in the m...,Light
4,4,2006-1,2006-01-04,2006,1,1,PA,Scotland,"Small, white, light in the northeast sky...dis...",Light


In [21]:
#ufodata.to_csv('ufodata.csv')

In [23]:
ufodata.describe()

Unnamed: 0,index,year,month,week_of_year
count,71565.0,71565.0,71565.0,71565.0
mean,35782.0,2013.25989,6.841626,27.879857
std,20659.180344,3.930989,3.261901,14.250315
min,0.0,2006.0,1.0,1.0
25%,17891.0,2010.0,4.0,16.0
50%,35782.0,2013.0,7.0,28.0
75%,53673.0,2016.0,10.0,39.0
max,71564.0,2020.0,12.0,53.0


In [24]:
len(ufodata)

71565