# 1033 Data 
### Summary
 - Download raw set
 - Group set by State, Item Name, Ship Date, and NSN (National Stock Number)
 - Derive PSC (Product and Suppy Codes) to categorize items.
 - Download PSC Manual to relate descriptions to PSC codes

In [None]:
import pandas as pd
#program_df = pd.read_csv('https://query.data.world/s/jblkhnme7uibgq4golet263yn5jcvj')


In [None]:
#program_df.head()

In [None]:
# Create a new DF grouping by State, Item Name, Ship Date, NSN.
# Get sum of Quantity
summary_df = program_df.groupby(by = ['Year','State','State_Ab','County', 'Item Name', 'Ship Date','NSN'])['Quantity'].sum()
summary_df = pd.DataFrame(summary_df)
summary_df.reset_index(inplace= True)
#summary_df.head()

In [None]:
# Having a mental block...I'm sure there is a more elegant way to add the sum of total cost to the DF above.  But this works.
# Make a new DF and merge it into summary_df on the non-aggregated attributes.
merge_can = program_df.groupby(by= ['Year','State','State_Ab','County', 'Item Name', 'Ship Date','NSN'])['Total.Cost'].sum()
merge_can = pd.DataFrame(merge_can)
merge_can.head()
merge_can.reset_index(inplace = True)
summary_df = summary_df.merge(merge_can, how= "inner", on = ['Year','State','State_Ab','County', 'Item Name', 'Ship Date','NSN'])
# summary_df.head()

In [None]:
# Per "Federal Procurement Data System" manual, 
# the PSC code can be used to categorize the Item Names and are the first 4 char of the NSN
# Get the PSC code from the NSN
summary_df['psc'] = summary_df['NSN'].str.slice(start=0, stop=4, step=None)

In [None]:
summary_df["psc"]=pd.to_numeric(summary_df["psc"], errors='coerce')


In [None]:
summary_df.head()


## Enhance Data with PSC Manual Data

### Steps:
 - Load PSC Codes
 - Load Group Codes
 - Merge PSC and Group data
 - Merge that set into Summary data
 

In [None]:
psc_df = pd.read_csv("psc_codes.csv")
#psc_df.head()

In [None]:
group_df = pd.read_csv("group_codes.csv" )
#group_df.head()

In [None]:
# use an inner join.  How many records drop?
# psc to numeric on both psc_df and summary_df.  There is an issue merging otherwise.
psc_df = psc_df.merge(group_df, how= 'inner', on = "group")
psc_df["psc"]=pd.to_numeric(psc_df["psc"], errors='coerce')


In [None]:
summary_df = summary_df.merge(psc_df, how = 'inner', on = 'psc')

In [None]:
summary_df.head()

In [None]:
geo_df = pd.read_csv("geo.csv")
geo_df.head()

In [None]:
summary_df = summary_df.merge(geo_df, how= "left", on = ['State_Ab','County'])

In [None]:
summary_df.head()
summary_df.to_csv("summary.csv")

## Plot Findings

 - Finding 1
 - Finding 2

In [42]:
import pandas as pd
summary_df = pd.read_csv("summary.csv")
summary_df

Unnamed: 0.1,Unnamed: 0,Year,State,State_Ab,County,Item Name,Ship Date,NSN,Quantity,Total.Cost,psc,group,psc_desc,psc_category,group_desc,group_note,Lat,Lon
0,0,2006,ALABAMA,AL,AUTAUGA,"RIFLE,5.56 MILLIMETER",11/15/2006,1005-00-073-9421,4,1996.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,32.450328,-86.721712
1,1,2006,ALABAMA,AL,BALDWIN,"PISTOL,CALIBER .45,AUTOMATIC",11/15/2006,1005-00-726-5655,24,1409.04,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,30.763492,-87.756371
2,2,2006,ALABAMA,AL,BALDWIN,"RIFLE,5.56 MILLIMETER",11/15/2006,1005-00-073-9421,2,998.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,30.763492,-87.756371
3,3,2006,ALABAMA,AL,BALDWIN,"RIFLE,5.56 MILLIMETER",4/15/2006,1005-00-073-9421,4,1996.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,30.763492,-87.756371
4,4,2006,ALABAMA,AL,BALDWIN,"RIFLE,7.62 MILLIMETER",11/15/2006,1005-00-589-1271,10,1380.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,30.763492,-87.756371
5,5,2006,ALABAMA,AL,BARBOUR,"RIFLE,5.56 MILLIMETER",11/15/2006,1005-00-073-9421,10,4990.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,31.805322,-85.421299
6,6,2006,ALABAMA,AL,BIBB,"RIFLE,5.56 MILLIMETER",11/15/2006,1005-00-073-9421,8,3992.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,32.989499,-87.220392
7,7,2006,ALABAMA,AL,BLOUNT,"RIFLE,5.56 MILLIMETER",11/15/2006,1005-00-073-9421,2,998.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,33.922162,-86.449262
8,8,2006,ALABAMA,AL,BLOUNT,"RIFLE,7.62 MILLIMETER",11/15/2006,1005-00-589-1271,1,138.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,33.922162,-86.449262
9,9,2006,ALABAMA,AL,BLOUNT,"RIFLE,7.62 MILLIMETER",5/15/2006,1005-00-589-1271,2,276.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,33.922162,-86.449262


In [43]:
from config import api_key, username
py.sign_in(username, api_key)
init_notebook_mode(connected=True)



In [44]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
plotly.tools.set_credentials_file(username='mansplanish', api_key='BkaNgNAvB72omUPLQVOJ')



In [45]:
# Plot Quantity of Guns, Ammunition, and other Weapons over time
# Limit dataframe to these categories

military_df =  summary_df[summary_df['group'].isin(['10', '13'])]



In [46]:
military_df

Unnamed: 0.1,Unnamed: 0,Year,State,State_Ab,County,Item Name,Ship Date,NSN,Quantity,Total.Cost,psc,group,psc_desc,psc_category,group_desc,group_note,Lat,Lon
0,0,2006,ALABAMA,AL,AUTAUGA,"RIFLE,5.56 MILLIMETER",11/15/2006,1005-00-073-9421,4,1996.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,32.450328,-86.721712
1,1,2006,ALABAMA,AL,BALDWIN,"PISTOL,CALIBER .45,AUTOMATIC",11/15/2006,1005-00-726-5655,24,1409.04,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,30.763492,-87.756371
2,2,2006,ALABAMA,AL,BALDWIN,"RIFLE,5.56 MILLIMETER",11/15/2006,1005-00-073-9421,2,998.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,30.763492,-87.756371
3,3,2006,ALABAMA,AL,BALDWIN,"RIFLE,5.56 MILLIMETER",4/15/2006,1005-00-073-9421,4,1996.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,30.763492,-87.756371
4,4,2006,ALABAMA,AL,BALDWIN,"RIFLE,7.62 MILLIMETER",11/15/2006,1005-00-589-1271,10,1380.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,30.763492,-87.756371
5,5,2006,ALABAMA,AL,BARBOUR,"RIFLE,5.56 MILLIMETER",11/15/2006,1005-00-073-9421,10,4990.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,31.805322,-85.421299
6,6,2006,ALABAMA,AL,BIBB,"RIFLE,5.56 MILLIMETER",11/15/2006,1005-00-073-9421,8,3992.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,32.989499,-87.220392
7,7,2006,ALABAMA,AL,BLOUNT,"RIFLE,5.56 MILLIMETER",11/15/2006,1005-00-073-9421,2,998.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,33.922162,-86.449262
8,8,2006,ALABAMA,AL,BLOUNT,"RIFLE,7.62 MILLIMETER",11/15/2006,1005-00-589-1271,1,138.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,33.922162,-86.449262
9,9,2006,ALABAMA,AL,BLOUNT,"RIFLE,7.62 MILLIMETER",5/15/2006,1005-00-589-1271,2,276.00,1005,10,Guns,through 30mm,Weapons,This group includes combat weapons as well as ...,33.922162,-86.449262


In [156]:
# Update colors andd add lables on rollover.

import plotly.graph_objs as go

trace1 = go.Bar(x=military_df['Year'], y=military_df['Quantity'], name='Quantity', )
trace2 = go.Bar(x=military_df['Year'], y=military_df['Total.Cost'], name='Total Cost')


data = [trace1, trace2]
plot(data)
py.iplot(data, filename='barchart')


In [117]:
#print(len(x_data) -1)
military_df['psc_desc'].unique()

array(['Guns',
       'Surface Use Explosive Ordnance Disposal Tools and Equipment',
       'Camouflage and Deception Equipment', 'Demolition Materials',
       'Miscellaneous Weapons Includes', 'Land Mines', 'Tactical Sets',
       'Specialized Ammunition Handling and Servicing Equipment',
       'Ammunition',
       'Assemblies Interchangeable Between Weapons in Two or More Classes'],
      dtype=object)

In [159]:
# Add color, tweak the alpha
# Add lables


y = military_df["Quantity"]
q = military_df["Total.Cost"]/5000

trace0 = go.Scatter(
    x= x,
    y= y,
    mode='markers',
    marker=dict(
        size=q,
    )
)

data2 = [trace0]
plot(data2)
py.iplot(data2, filename='bubblechart-size')
