In [30]:
import pandas as pd
import numpy as np
import matplotlib as plt

In [31]:
filepath = 'logistics_ppe.csv'

ppe_data = pd.read_csv(filepath)

In [32]:
ppe_data.head()

Unnamed: 0,county,product_family,quantity_filled,shipping_zip_postal_code,as_of_date
0,Non-Governmental Entity,Surgical Masks,,92064,2020-08-12
1,Non-Governmental Entity,Hand Sanitizers,,92064,2020-08-12
2,San Diego,Test Kits,,92123,2020-08-12
3,San Diego,Coveralls (Hospitals or EMS),,92123,2020-08-12
4,San Diego,Coveralls (Hospitals or EMS),,92123,2020-08-12


In [33]:
# data cleanup
ppedf = pd.DataFrame()

ppedf = ppe_data.rename(columns = {'county':'County', 
                                      'product_family':'Product',
                                     'quantity_filled': 'Amount Fulfilled',
                                     'shipping_zip_postal_code': 'Postal',
                                     'as_of_date': 'Date'
                                     })
ppedf.head()

Unnamed: 0,County,Product,Amount Fulfilled,Postal,Date
0,Non-Governmental Entity,Surgical Masks,,92064,2020-08-12
1,Non-Governmental Entity,Hand Sanitizers,,92064,2020-08-12
2,San Diego,Test Kits,,92123,2020-08-12
3,San Diego,Coveralls (Hospitals or EMS),,92123,2020-08-12
4,San Diego,Coveralls (Hospitals or EMS),,92123,2020-08-12


In [34]:
# add up quantity to each unique product family
ppedf.drop(['Postal'], axis = 1, inplace = True)
ppedf

Unnamed: 0,County,Product,Amount Fulfilled,Date
0,Non-Governmental Entity,Surgical Masks,,2020-08-12
1,Non-Governmental Entity,Hand Sanitizers,,2020-08-12
2,San Diego,Test Kits,,2020-08-12
3,San Diego,Coveralls (Hospitals or EMS),,2020-08-12
4,San Diego,Coveralls (Hospitals or EMS),,2020-08-12
...,...,...,...,...
3399296,Unassigned,Other / None of the above,0.0,2020-09-26
3399297,Unassigned,Other / None of the above,0.0,2020-09-26
3399298,Unassigned,Other / None of the above,0.0,2020-09-26
3399299,Unassigned,Other / None of the above,0.0,2020-09-26


In [35]:
# drop rows with NA values
ppedf.dropna()

Unnamed: 0,County,Product,Amount Fulfilled,Date
25,Non-Governmental Entity,Hand Sanitizers,0.0,2020-08-12
26,Non-Governmental Entity,Hand Sanitizers,0.0,2020-08-12
28,Fresno,N-95 Respirators,17400.0,2020-08-12
29,Los Angeles,Hand Sanitizers,216.0,2020-08-12
30,Los Angeles,Surgical Masks,2000.0,2020-08-12
...,...,...,...,...
3399296,Unassigned,Other / None of the above,0.0,2020-09-26
3399297,Unassigned,Other / None of the above,0.0,2020-09-26
3399298,Unassigned,Other / None of the above,0.0,2020-09-26
3399299,Unassigned,Other / None of the above,0.0,2020-09-26


In [36]:
ppedf['Date'].value_counts()

2020-10-16    34120
2020-10-15    34085
2020-10-14    34022
2020-10-13    33984
2020-10-12    33941
              ...  
2020-06-12    15996
2020-06-11    15223
2020-06-10    14811
2020-06-09    14528
2020-06-08    14429
Name: Date, Length: 128, dtype: int64

In [37]:
# Drop rows with 0 fulfilled equipment
ppedf = ppedf[ppedf['Amount Fulfilled'] > 0]
ppedf

Unnamed: 0,County,Product,Amount Fulfilled,Date
28,Fresno,N-95 Respirators,17400.0,2020-08-12
29,Los Angeles,Hand Sanitizers,216.0,2020-08-12
30,Los Angeles,Surgical Masks,2000.0,2020-08-12
31,Los Angeles,Cloth Masks,22500.0,2020-08-12
32,Los Angeles,Face Shields (Disposable),200.0,2020-08-12
...,...,...,...,...
3398257,San Mateo,N-95 Respirators,35000.0,2020-09-26
3399269,Kern,Examination Gloves,20000.0,2020-09-26
3399270,Kern,Examination Gloves,10000.0,2020-09-26
3399271,Kern,Surgical Masks,20000.0,2020-09-26


In [38]:
ppedf.groupby(['County', 'Product', 'Date'])['Amount Fulfilled'].sum().to_frame().reset_index()

Unnamed: 0,County,Product,Date,Amount Fulfilled
0,Alameda,Cloth Masks,2020-06-08,2000.0
1,Alameda,Cloth Masks,2020-06-09,2000.0
2,Alameda,Cloth Masks,2020-06-10,2000.0
3,Alameda,Cloth Masks,2020-06-11,2000.0
4,Alameda,Cloth Masks,2020-06-12,114000.0
...,...,...,...,...
131290,Yuba,Viral Testing Media,2020-10-12,10596.0
131291,Yuba,Viral Testing Media,2020-10-13,10596.0
131292,Yuba,Viral Testing Media,2020-10-14,10596.0
131293,Yuba,Viral Testing Media,2020-10-15,10596.0


In [39]:
ppedf.head()

Unnamed: 0,County,Product,Amount Fulfilled,Date
28,Fresno,N-95 Respirators,17400.0,2020-08-12
29,Los Angeles,Hand Sanitizers,216.0,2020-08-12
30,Los Angeles,Surgical Masks,2000.0,2020-08-12
31,Los Angeles,Cloth Masks,22500.0,2020-08-12
32,Los Angeles,Face Shields (Disposable),200.0,2020-08-12


In [40]:
ppedf = ppedf.sort_values('County')

In [41]:
ppedf

Unnamed: 0,County,Product,Amount Fulfilled,Date
2991588,Alameda,KN95 Respirators,1260.0,2020-10-16
1971415,Alameda,Cloth Masks,31500.0,2020-09-06
1971414,Alameda,N-95 Respirators,960.0,2020-09-06
1224416,Alameda,Examination Gloves,50000.0,2020-09-28
2917184,Alameda,N-95 Respirators,1260.0,2020-10-02
...,...,...,...,...
3328940,,Hand Sanitizers,520.0,2020-09-02
3332743,,N-95 Respirators,20.0,2020-09-25
3356302,,Hand Sanitizers,520.0,2020-09-25
3366025,,N-95 Respirators,20.0,2020-09-26


In [42]:
ppedf.to_csv('PPE_data_clean.csv')