In [None]:
import pandas as pd
import re

# !python --version    #Python 3.8.5
# pd.__version__       #1.1.2
# re.__version__       #2.2.1
#  datetime standard module

In [None]:
# useful functions
def make_dataframe(a_dict: dict) -> pd.DataFrame:
    """ Return dataframe of all values in dictionary."""
    a_df = pd.concat([pd.concat([v],ignore_index=True) for k,v in a_dict.items()],ignore_index=True).\
                    apply(lambda x: x.str.strip() if x.dtype == 'object' else x)
    a_df.index = a_df['Ship Date']
    a_df.index = a_df.index.normalize()
    a_df.index.name = 'Date'
    return a_df

In [None]:
# Get quarterly cumulative LESO Transferred Property data file from 
#     Defense Logicstics Agency Law Enforcement Support Office Public Information
# Orginal name of the data file should be in the form:
#      DISP_AllStatesAndTerritories_mmddyyyy.xlsx  
# Enter the local file name
LESO_Q1_file = "DISP_AllStatesAndTerritories_03312020.xlsx"
LESO_Q2_file = "DISP_AllStatesAndTerritories_06302020.xlsx"
#LESO_Q3_file = "DISP_AllStatesAndTerritories_09302020.xlsx"
#LESO_Q4_file = "DISP_AllStatesAndTerritories_12312020.xlsx"

### Prepare data

In [None]:
# Load the data into dictionary of states (sheet names) and dataframes (sheet contents)
q1_dict = pd.read_excel(LESO_Q1_file, sheet_name=None)
q2_dict = pd.read_excel(LESO_Q2_file, sheet_name=None)
#q3_dict = pd.read_excel(LESO_Q3_file, sheet_name=None)
#q4_dict = pd.read_excel(LESO_Q4_file, sheet_name=None)

In [None]:
q1_df = make_dataframe(q1_dict)
q2_df = make_dataframe(q2_dict)
#q3_df = make_dataframe(q3_dict)
#q4_df = make_dataframe(q4_dict)
print('Q1 shape:',q1_df.shape)  #Quarter 1 (141068, 11)
print('Q2 shape:',q2_df.shape)  #Quarter 2 (138813, 11)
#print('Q3 shape:',q1_df.shape)  #Quarter 1 (141068, 11)
#print('Q4 shape:',q2_df.shape)  #Quarter 2 (138813, 11)
print('Quarter 1 has dates between',q1_df['Ship Date'].min(),'and',q1_df['Ship Date'].max())
print('Quarter 2 has dates between',q2_df['Ship Date'].min(),'and',q2_df['Ship Date'].max())
#print('Quarter 3 has dates between',q1_df['Ship Date'].min(),'and',q1_df['Ship Date'].max())
#print('Quarter 4 has dates between',q2_df['Ship Date'].min(),'and',q2_df['Ship Date'].max())

### Analyze data

In [None]:
q1_count_state = q1_df.groupby(pd.Grouper(freq='Y'))['State'].count()
q2_count_state = q2_df.groupby(pd.Grouper(freq='Y'))['State'].count()
#q3_count_state = q3_df.groupby(pd.Grouper(freq="Y"))['State'].count()
#q4_count_state = q4_df.groupby(pd.Grouper(freq="Y"))['State'].count()
yearIndex = pd.date_range(start='1980-01-01', end='2020-06-30', freq='Y')

In [None]:
year_df = pd.DataFrame({
                        'Q1_count': q1_count_state,
                        'Q2_count': q2_count_state,
                        #'Q3_count': q3_count_state,
                        #'Q4_count': q4_count_state
                        },
                        index=yearIndex)

In [None]:
year_df.shape

In [None]:
ax = year_df.plot.bar(rot=90,figsize=(16,8))

In [None]:
# Check actual differences for different years
start_year = '1990-01-01'
end_year = '2020-03-31'
compare_df = pd.DataFrame(
    {
    'Q1-Q2': year_df.loc[start_year:end_year,'Q1_count'] - year_df.loc[start_year:end_year,'Q2_count'],
    #'Q2-Q3': year_df.loc[start_year:end_year,'Q2_count'] - year_df.loc[start_year:end_year,'Q3_count'],
    #'Q3-Q4': year_df.loc[start_year:end_year,'Q3_count'] - year_df.loc[start_year:end_year,'Q4_count'],
    })
compare_df