In [107]:
%matplotlib inline 
import matplotlib.pyplot as plt 
import pandas as pd
import numpy as np

import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std

import plotly.plotly as py
import plotly.graph_objs as go
py.sign_in('erikrood','3eqsrype8v')

#source - https://github.com/voxmedia/data-projects/blob/master/verge-drones-over-america/readme.md

#working trix - likely will need to do a weighted point system to separate by true category
#also, could do map to see where in the US (state-wise) the drone permits are coming in

In [21]:
DroneData = pandas.read_csv('/Users/erikrood/desktop/ipython_datasets/drone-data-full 2.csv') 

In [25]:
DroneData.head(2000)

Unnamed: 0,date,company,category,various_applications,aircraft,location
0,9/25/2014,Astraeus Aerial,Photo/Film,False,Astraeus Aerial Cinema System V.3CS UAS,CA
1,9/25/2014,Aerial MOB,Photo/Film,False,"HexaCrafter HC-1100, Aeronavics SkyJib 8 Heavy...",CA
2,9/25/2014,Pictorvision,Photo/Film,False,"PV- 14817, PVHL1, PV- 14817 PV-HL2, DJI Phanto...",CA
3,9/25/2014,"RC Pro Productions Consulting, dba Vortex Aerial",Photo/Film,False,"Coaxial Quad Multirotor VAO1, Coaxial Quad Mul...",CA
4,9/25/2014,Snaproll Media,Photo/Film,False,"Snaproll Media SUAS, Freefly Cinestar 6, Freef...",TN
5,9/25/2014,HeliVideo Productions,Photo/Film,False,"HVP- 14301 MultiRotor, Rotorcraft Model ERX12",TX
6,10/10/2014,Flying Cam,"Manufacturer, Photo/Film",False,Flying-Cam 3.0 SARAH,CA
7,12/10/2014,"Trimble Navigation, Ltd","Manufacturer, Agriculture",False,"Trimble UX5, Trimble UX5 hp, Trimble ZX5, Eagl...",CA
8,12/10/2014,Clayco,Construction,False,Skycatch Unmanned Aircraft System,IL
9,12/10/2014,Woolpert (I),"Manufacturer, Agriculture",False,Woolpert Altavian Nova Block III (Nova Block III),OH


In [23]:
#n-count by category, date, location (map)
len(DroneData)

2733

In [32]:
DroneData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2733 entries, 0 to 2732
Data columns (total 7 columns):
date                    2733 non-null object
company                 2733 non-null object
category                2733 non-null object
various_applications    2733 non-null bool
aircraft                2733 non-null object
location                2712 non-null object
count                   2733 non-null int64
dtypes: bool(1), int64(1), object(5)
memory usage: 130.8+ KB


In [177]:
#converting category field to string
DroneData[['category']] = DroneData[['category']].astype(str)
DroneData[['company']] = DroneData[['company']].astype(str)
DroneData[['count']] = DroneData[['count']].astype(float)

In [28]:
#defining function to count the # of commas in the category field
#ultimately used to weight the occurance since some registrations list multiple categories
def count_fx(s):
    return s.count(',')
DroneData['count'] = DroneData['category'].apply(count_fx)


In [30]:
DroneData.head(10)

Unnamed: 0,date,company,category,various_applications,aircraft,location,count
0,9/25/2014,Astraeus Aerial,Photo/Film,False,Astraeus Aerial Cinema System V.3CS UAS,CA,0
1,9/25/2014,Aerial MOB,Photo/Film,False,"HexaCrafter HC-1100, Aeronavics SkyJib 8 Heavy...",CA,0
2,9/25/2014,Pictorvision,Photo/Film,False,"PV- 14817, PVHL1, PV- 14817 PV-HL2, DJI Phanto...",CA,0
3,9/25/2014,"RC Pro Productions Consulting, dba Vortex Aerial",Photo/Film,False,"Coaxial Quad Multirotor VAO1, Coaxial Quad Mul...",CA,0
4,9/25/2014,Snaproll Media,Photo/Film,False,"Snaproll Media SUAS, Freefly Cinestar 6, Freef...",TN,0
5,9/25/2014,HeliVideo Productions,Photo/Film,False,"HVP- 14301 MultiRotor, Rotorcraft Model ERX12",TX,0
6,10/10/2014,Flying Cam,"Manufacturer, Photo/Film",False,Flying-Cam 3.0 SARAH,CA,1
7,12/10/2014,"Trimble Navigation, Ltd","Manufacturer, Agriculture",False,"Trimble UX5, Trimble UX5 hp, Trimble ZX5, Eagl...",CA,1
8,12/10/2014,Clayco,Construction,False,Skycatch Unmanned Aircraft System,IL,0
9,12/10/2014,Woolpert (I),"Manufacturer, Agriculture",False,Woolpert Altavian Nova Block III (Nova Block III),OH,1


In [44]:
DroneData[['count']] = DroneData[['count']].astype(int)

In [48]:
#creating the weighted score variable, defined as 1/# of categories included in registration
DroneData['weighted_score'] = np.where((DroneData['count']>0),1/(DroneData["count"]+1),0)

In [55]:
DroneData.head(10)

Unnamed: 0,date,company,category,various_applications,aircraft,location,count,weighted_score
0,9/25/2014,Astraeus Aerial,Photo/Film,False,Astraeus Aerial Cinema System V.3CS UAS,CA,0,0.0
1,9/25/2014,Aerial MOB,Photo/Film,False,"HexaCrafter HC-1100, Aeronavics SkyJib 8 Heavy...",CA,0,0.0
2,9/25/2014,Pictorvision,Photo/Film,False,"PV- 14817, PVHL1, PV- 14817 PV-HL2, DJI Phanto...",CA,0,0.0
3,9/25/2014,"RC Pro Productions Consulting, dba Vortex Aerial",Photo/Film,False,"Coaxial Quad Multirotor VAO1, Coaxial Quad Mul...",CA,0,0.0
4,9/25/2014,Snaproll Media,Photo/Film,False,"Snaproll Media SUAS, Freefly Cinestar 6, Freef...",TN,0,0.0
5,9/25/2014,HeliVideo Productions,Photo/Film,False,"HVP- 14301 MultiRotor, Rotorcraft Model ERX12",TX,0,0.0
6,10/10/2014,Flying Cam,"Manufacturer, Photo/Film",False,Flying-Cam 3.0 SARAH,CA,1,0.5
7,12/10/2014,"Trimble Navigation, Ltd","Manufacturer, Agriculture",False,"Trimble UX5, Trimble UX5 hp, Trimble ZX5, Eagl...",CA,1,0.5
8,12/10/2014,Clayco,Construction,False,Skycatch Unmanned Aircraft System,IL,0,0.0
9,12/10/2014,Woolpert (I),"Manufacturer, Agriculture",False,Woolpert Altavian Nova Block III (Nova Block III),OH,1,0.5


### Registration by state

In [71]:
df_map = DroneData[[1,5]]
#df = df_map.groupby('location')
#df.head(5)
df_map.head()

Unnamed: 0,company,location
0,Astraeus Aerial,CA
1,Aerial MOB,CA
2,Pictorvision,CA
3,"RC Pro Productions Consulting, dba Vortex Aerial",CA
4,Snaproll Media,TN


In [76]:
df=df_map.groupby('location').count().sort_values(by='company',ascending=False).reset_index()
df.head(100)

Unnamed: 0,location,company
0,FL,306
1,CA,305
2,TX,234
3,CO,89
4,IL,89
5,VA,84
6,OH,81
7,NC,80
8,PA,79
9,AZ,78


In [84]:
df.drop([54,55,56,57,58,59,60,61,62,63,64,65,66],inplace=True)
df.head(100)

Unnamed: 0,location,company
0,FL,306
1,CA,305
2,TX,234
3,CO,89
4,IL,89
5,VA,84
6,OH,81
7,NC,80
8,PA,79
9,AZ,78


In [271]:
#reference -- https://plot.ly/python/ipython-notebook-tutorial/
#embed -- <iframe width="900" height="800" frameborder="0" scrolling="no" src="https://plot.ly/~erikrood/0.embed"></iframe>

scl = [[0,"rgb(220, 220, 220)"],[0.35,"rgb(106, 137, 247)"],[0.5,"rgb(90, 120, 245)"],\
            [0.6,"rgb(70, 100, 245)"],[0.7,"rgb(40, 60, 190)"],[1,"rgb(5, 10, 172)"]]

df['text'] = df['location'] 
    
data = [dict(
    type='choropleth',
    colorscale = scl,
    autocolorscale = False,
    locations = df['location'],
    z = df['company'].astype(float),
    locationmode = 'USA-states',
    text = df['text'],
    hoverinfo = 'location+z',
    marker = dict(
        line = dict (
            color = 'rgb(255,255,255)',
            width = 2
        )
    ),
    colorbar = dict(
        title = "# of commercial drone registrations"
    )
)]

layout = dict(
    title = '# of commercial drone registrations by U.S. state',
    geo = dict(
        scope='usa',
        projection=dict( type='albers usa' ),
        showlakes = True,
        lakecolor = 'rgb(255, 255, 255)'
    )
)
    
fig = dict(data=data, layout=layout)

py.iplot(fig, validate=False, filename='drone_map')

In [96]:
DroneData.head(10)

Unnamed: 0,date,company,category,various_applications,aircraft,location,count,weighted_score
0,9/25/2014,Astraeus Aerial,Photo/Film,False,Astraeus Aerial Cinema System V.3CS UAS,CA,0,0.0
1,9/25/2014,Aerial MOB,Photo/Film,False,"HexaCrafter HC-1100, Aeronavics SkyJib 8 Heavy...",CA,0,0.0
2,9/25/2014,Pictorvision,Photo/Film,False,"PV- 14817, PVHL1, PV- 14817 PV-HL2, DJI Phanto...",CA,0,0.0
3,9/25/2014,"RC Pro Productions Consulting, dba Vortex Aerial",Photo/Film,False,"Coaxial Quad Multirotor VAO1, Coaxial Quad Mul...",CA,0,0.0
4,9/25/2014,Snaproll Media,Photo/Film,False,"Snaproll Media SUAS, Freefly Cinestar 6, Freef...",TN,0,0.0
5,9/25/2014,HeliVideo Productions,Photo/Film,False,"HVP- 14301 MultiRotor, Rotorcraft Model ERX12",TX,0,0.0
6,10/10/2014,Flying Cam,"Manufacturer, Photo/Film",False,Flying-Cam 3.0 SARAH,CA,1,0.5
7,12/10/2014,"Trimble Navigation, Ltd","Manufacturer, Agriculture",False,"Trimble UX5, Trimble UX5 hp, Trimble ZX5, Eagl...",CA,1,0.5
8,12/10/2014,Clayco,Construction,False,Skycatch Unmanned Aircraft System,IL,0,0.0
9,12/10/2014,Woolpert (I),"Manufacturer, Agriculture",False,Woolpert Altavian Nova Block III (Nova Block III),OH,1,0.5


In [211]:
df7 = DroneData[['category','company']]
df7.head(7)

Unnamed: 0,category,company
0,Photo/Film,Astraeus Aerial
1,Photo/Film,Aerial MOB
2,Photo/Film,Pictorvision
3,Photo/Film,"RC Pro Productions Consulting, dba Vortex Aerial"
4,Photo/Film,Snaproll Media
5,Photo/Film,HeliVideo Productions
6,"Manufacturer, Photo/Film",Flying Cam


In [212]:
s = df7['category'].str.split(', ').apply(pd.Series, 1).stack()

In [213]:
s.index = s.index.droplevel(-1) # to line up with df1's index
s.name = 'category' # needs a name to join

In [214]:
s

0                            Photo/Film
1                            Photo/Film
2                            Photo/Film
3                            Photo/Film
4                            Photo/Film
5                            Photo/Film
6                          Manufacturer
6                            Photo/Film
7                          Manufacturer
7                           Agriculture
8                          Construction
9                          Manufacturer
9                           Agriculture
10                         Manufacturer
10                          Agriculture
11      Utilities/Energy/Infrastructure
12                          Real Estate
13                          Agriculture
14                           Photo/Film
15                           Photo/Film
16                           Photo/Film
17                           Photo/Film
18      Utilities/Energy/Infrastructure
19                           Photo/Film
20                           Photo/Film


In [215]:
del df7['category']

In [216]:
df7.head()

Unnamed: 0,company
0,Astraeus Aerial
1,Aerial MOB
2,Pictorvision
3,"RC Pro Productions Consulting, dba Vortex Aerial"
4,Snaproll Media


In [263]:
df8 = df7.join(s)

### agnostic of company, what is the most popular category?

In [264]:
df8.head(5)

Unnamed: 0,company,category
0,Astraeus Aerial,Photo/Film
1,Aerial MOB,Photo/Film
2,Pictorvision,Photo/Film
3,"RC Pro Productions Consulting, dba Vortex Aerial",Photo/Film
4,Snaproll Media,Photo/Film


In [265]:
df4 = df8.groupby('category').count().sort_values(by='company',ascending=False).reset_index()
df4.head(5)

Unnamed: 0,category,company
0,Photo/Film,1640
1,Real Estate,1153
2,Construction,684
3,Utilities/Energy/Infrastructure,637
4,Agriculture,590


In [267]:
df10 = df4.head(10)

In [266]:
df4.columns = ['category', 'total_registrations']
df4['perc_of_total'] = ((df4['total_registrations']/df4['total_registrations'].sum()))*100
df4.head(10000)

Unnamed: 0,category,total_registrations,perc_of_total
0,Photo/Film,1640,28.596338
1,Real Estate,1153,20.104621
2,Construction,684,11.926765
3,Utilities/Energy/Infrastructure,637,11.107236
4,Agriculture,590,10.287707
5,Emergency Services,369,6.434176
6,Education,145,2.528335
7,Conservation,84,1.46469
8,Insurance,81,1.41238
9,Government Contracting,79,1.377507


In [268]:
#bar with # of registrations, line with % of total

#bar chart

x = df10['category']
y1 = df10['total_registrations']
y2 = df10['perc_of_total']


trace0 = go.Bar(
        x=x,
        y=y1,
        name='total registrations',
        marker=dict(
            color='rgb(255, 217, 102)',
            line=dict(
                color='rgb(8,48,107)',
                width=1.5
            ),
        ),
        opacity=0.6
    )

trace1 = go.Scatter(
        x=x,
        y=y2,
        name='percent of total registrations',
        yaxis = 'y2',
        marker=dict(
            color='rgb(84, 226, 129)',
            line=dict(
                color='rgb(8,48,107)',
                width=1.5
            ),
        ),
        opacity=0.6
    )
        
data = [trace0, trace1]
    
layout = go.Layout(
      title='The top 10 categories account for >95% of all commercial drone registrations',
    legend=dict(
        x=.5,
        y=1,
        bgcolor='#E2E2E2',
        bordercolor='#FFFFFF',
        borderwidth=2,
        ),
    xaxis=dict(
        title='category',
        tickangle = 47,
    ),
    
    yaxis=dict(
        title='drone registrations'
    ),
    
    yaxis2=dict(
        title='percent of total registrations',
        overlaying='y',
        side='right'
    ),
    
     margin=go.Margin(
        b = 150,
        r=50,
        t = 50
    )

    
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='drone_registrations')

### Which company has the most registrations?

In [183]:
#fixing counts here (upping by 1 across all)
df2 = DroneData[['company', 'count']]
df2['count_clean'] = (df2['count'] + 1)
del df2['count']
df2.head()
#df2[]
#df3.head()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



Unnamed: 0,company,count_clean
0,Astraeus Aerial,1.0
1,Aerial MOB,1.0
2,Pictorvision,1.0
3,"RC Pro Productions Consulting, dba Vortex Aerial",1.0
4,Snaproll Media,1.0


In [185]:
df3 = df2.groupby('company').mean().sort_values(by='count_clean',ascending=False).reset_index()

In [187]:
df3.head(50)

Unnamed: 0,company,count_clean
0,Terp et al LLC,7.0
1,"Texas SkyCam, LLC",7.0
2,"Above It All- UAS Services, LLC",7.0
3,"Applied Art and Technology, LLC",7.0
4,Soaring Eagle Imaging LLC,7.0
5,Marc A Menowitz,7.0
6,"Knowledge is Power, LLC",6.0
7,"KopterTek, LLC",6.0
8,"Marketshare, Inc.",6.0
9,Complete UAS LLC,6.0


In [257]:
df10.head(10)

Unnamed: 0,category,total_registrations,perc_of_total
0,Photo/Film,1640,0.285963
1,Real Estate,1153,0.201046
2,Construction,684,0.119268
3,Utilities/Energy/Infrastructure,637,0.111072
4,Agriculture,590,0.102877
5,Emergency Services,369,0.064342
6,Education,145,0.025283
7,Conservation,84,0.014647
8,Insurance,81,0.014124
9,Government Contracting,79,0.013775


In [270]:
df4.head(70)

Unnamed: 0,category,total_registrations,perc_of_total
0,Photo/Film,1640,28.596338
1,Real Estate,1153,20.104621
2,Construction,684,11.926765
3,Utilities/Energy/Infrastructure,637,11.107236
4,Agriculture,590,10.287707
5,Emergency Services,369,6.434176
6,Education,145,2.528335
7,Conservation,84,1.464690
8,Insurance,81,1.412380
9,Government Contracting,79,1.377507
