# 1033 Data 
### Summary
 - Download raw set
 - Group set by State, Item Name, Ship Date, and NSN (National Stock Number)
 - Derive PSC (Product and Suppy Codes) to categorize items.
 - Download PSC Manual to relate descriptions to PSC codes

In [1]:
import pandas as pd
program_df = pd.read_csv('https://query.data.world/s/jblkhnme7uibgq4golet263yn5jcvj')
#program_df.head()

In [2]:
# Create a new DF grouping by State, Item Name, Ship Date, NSN.
# Get sum of Quantity
summary_df = program_df.groupby(by = ['State','Item Name', 'Ship Date','NSN'])['Quantity'].sum()
summary_df = pd.DataFrame(summary_df)
summary_df.reset_index(inplace= True)
#summary_df.head()

In [3]:
# Having a mental block...I'm sure there is a more elegant way to add the sum of total cost to the DF above.  But this works.
# Make a new DF and merge it into summary_df on the non-aggregated attributes.
merge_can = program_df.groupby(by= ['State','Item Name', 'Ship Date','NSN'])['Total.Cost'].sum()
merge_can = pd.DataFrame(merge_can)
merge_can.head()
merge_can.reset_index(inplace = True)
summary_df = summary_df.merge(merge_can, how= "inner", on =['State','Item Name', 'Ship Date','NSN'])
# summary_df.head()

In [4]:
# Per "Federal Procurement Data System" manual, 
# the PSC code can be used to categorize the Item Names and are the first 4 char of the NSN
# Get the PSC code from the NSN
summary_df['psc'] = summary_df['NSN'].str.slice(start=0, stop=4, step=None)

In [5]:
summary_df["psc"]=pd.to_numeric(summary_df["psc"], errors='coerce')


In [6]:
summary_df.head()

Unnamed: 0,State,Item Name,Ship Date,NSN,Quantity,Total.Cost,psc
0,ALABAMA,1 4 DRIVE SOCKET SET,4/15/2012,5120-00-WRE-NSET,1,151.24,5120
1,ALABAMA,10 5 BLK JUNGLE BOOT,4/15/2012,8430-01-B00-T,1,85.0,8430
2,ALABAMA,11 5 DESERT BOOT,4/15/2012,8430-01-BOO-T,1,125.0,8430
3,ALABAMA,11 GALLON SHARPS CONTAINER RE,2/15/2012,6530-CO-NTA-INER,1,142.0,6530
4,ALABAMA,12 INCH SPEAKER S,4/15/2012,5835-00-012-SPEA,1,800.0,5835


## Enhance Data with PSC Manual Data

### Steps:
 - Load PSC Codes
 - Load Group Codes
 - Merge PSC and Group data
 - Merge that set into Summary data
 

In [8]:
psc_df = pd.read_csv("psc_codes.csv")
#psc_df.head()

In [9]:
group_df = pd.read_csv("group_codes.csv" )
#group_df.head()

In [10]:
# use an inner join.  How many records drop?
# psc to numeric on both psc_df and summary_df.  There is an issue merging otherwise.
psc_df = psc_df.merge(group_df, how= 'inner', on = "group")
psc_df["psc"]=pd.to_numeric(psc_df["psc"], errors='coerce')


In [15]:
summary_df = summary_df.merge(psc_df, how = 'inner', on = 'psc')

In [16]:
summary_df.head()

Unnamed: 0,State,Item Name,Ship Date,NSN,Quantity,Total.Cost,psc,group,psc_desc,psc_category,group_desc,group_note
0,ALABAMA,1 4 DRIVE SOCKET SET,4/15/2012,5120-00-WRE-NSET,1,151.24,5120,51,Hand Tools,Nonedged,Hand Tools,
1,ALABAMA,3 8 DRIVE SOCKET SET,4/15/2012,5120-00-WRE-NSET,1,250.0,5120,51,Hand Tools,Nonedged,Hand Tools,
2,ALABAMA,ADAPTER SOCKET WREN,4/15/2012,5120-01-335-2021,1,62.16,5120,51,Hand Tools,Nonedged,Hand Tools,
3,ALABAMA,ADAPTER SOCKET WREN,4/15/2012,5120-01-431-2957,2,8.36,5120,51,Hand Tools,Nonedged,Hand Tools,
4,ALABAMA,"ADAPTER,SOCKET WREN",4/15/2012,5120-00-227-8088,3,23.97,5120,51,Hand Tools,Nonedged,Hand Tools,


In [17]:
## walah.  Merged dataset ready for some plotting work.
summary_df.to_csv("summary_df.csv")