In [1]:
import pandas as pd
import datetime
import numpy as np
import mysql.connector
import json
import csv
import os
import matplotlib as plt
import matplotlib.pyplot as pyplot
import dataframe_image as dfi
import requests

### Downloading the most recent sales data directly from the Census Bureau website

In [2]:
# 1. define url path
url = "https://www.census.gov/retail/mrts/www/mrtssales92-present.xlsx"

# 2. download the data behind the URL
response = requests.get(url)

# 3. Open the response into a new file called mrtssales92-present.xlsx
open("mrtssales92-present.xlsx", "wb").write(response.content)

440409

### Reading the MRTS Data and Data Preparation

In [3]:
xlsx = pd.ExcelFile("mrtssales92-present.xlsx")

sheets_dict = pd.read_excel(xlsx)

sales_df = pd.DataFrame()

for sheet in reversed(xlsx.sheet_names):
    
    # Capture the 65 Adjusted business types from column B in the spreadsheet and place them into a dataframe
    categories = pd.read_excel(xlsx, sheet_name=sheet, header = 3, usecols ="B").iloc[2:67]

    # The first sheet, 2022, has 7 less columns than the others so it has an exception
    if sheet == '2022':
        sales_figs = pd.read_excel(xlsx, sheet_name=sheet, header = 4, usecols ="C:J").iloc[1:66]
    else:
        # Otherwise place the 65 rows of values from columns C through N into a dataframe
        sales_figs = pd.read_excel(xlsx, sheet_name=sheet, header = 4, usecols ="C:N").iloc[1:66]

    # Combine the dataframe containing the business types with the dataframe that holds their associated sales figures.
    reshaped_df = pd.concat([categories.reset_index(drop=True),sales_figs.reset_index(drop=True)], axis=1)
    sales_df = pd.concat([sales_df,reshaped_df.reset_index(drop=True)], axis=1)

In [4]:
sales_df.shape

(65, 399)

In [5]:
sales_df.head()

Unnamed: 0,Kind of Business,Jan. 1992,Feb. 1992,Mar. 1992,Apr. 1992,May 1992,Jun. 1992,Jul. 1992,Aug. 1992,Sep. 1992,...,Dec. 2021,Kind of Business.1,Jan. 2022,Feb. 2022,Mar. 2022,Apr. 2022,May 2022,Jun. 2022,Jul. 2022,Aug. 2022
0,"Retail and food services sales, total",146376,147079,159336,163669,170068,168663,169890,170364,164617,...,711673,"Retail and food services sales, total",587961,579139,681854,681357,705001,697935,690050,702729
1,Retail sales and food services excl motor vehi...,116565,115862,124200,127587,133608,130274,132076,134928,128734,...,589369,Retail sales and food services excl motor vehi...,474414,462148,541746,544061,572984,565376,562385,566838
2,Retail sales and food services excl gasoline s...,134277,135499,147064,151226,156619,155236,156016,156539,151477,...,659844,Retail sales and food services excl gasoline s...,538863,529564,617457,616581,632118,622594,615971,634752
3,Retail sales and food services excl motor vehi...,104466,104282,111928,115144,120159,116847,118202,121103,115594,...,537540,Retail sales and food services excl motor vehi...,425316,412573,477349,479285,500101,490035,488306,498861
4,"Retail sales, total",130683,131244,142488,147175,152420,151849,152586,152476,148158,...,632849,"Retail sales, total",518146,506400,597077,594426,614474,609933,600746,613416


In [6]:
sales_df = sales_df.drop(columns ='Kind of Business')
sales_df.head()

Unnamed: 0,Jan. 1992,Feb. 1992,Mar. 1992,Apr. 1992,May 1992,Jun. 1992,Jul. 1992,Aug. 1992,Sep. 1992,Oct. 1992,...,Nov. 2021,Dec. 2021,Jan. 2022,Feb. 2022,Mar. 2022,Apr. 2022,May 2022,Jun. 2022,Jul. 2022,Aug. 2022
0,146376,147079,159336,163669,170068,168663,169890,170364,164617,173655,...,654469,711673,587961,579139,681854,681357,705001,697935,690050,702729
1,116565,115862,124200,127587,133608,130274,132076,134928,128734,136917,...,538632,589369,474414,462148,541746,544061,572984,565376,562385,566838
2,134277,135499,147064,151226,156619,155236,156016,156539,151477,159992,...,602186,659844,538863,529564,617457,616581,632118,622594,615971,634752
3,104466,104282,111928,115144,120159,116847,118202,121103,115594,123254,...,486349,537540,425316,412573,477349,479285,500101,490035,488306,498861
4,130683,131244,142488,147175,152420,151849,152586,152476,148158,155987,...,579687,632849,518146,506400,597077,594426,614474,609933,600746,613416


In [7]:
transposed_df = sales_df.transpose()
transposed_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,64
Jan. 1992,146376,116565,134277,104466,130683,100872,33906,29811,26788,25800,...,658,371,6860,2692,1916,15693,1049,13325,6887,6438
Feb. 1992,147079,115862,135499,104282,131244,100027,35220,31217,28203,27031,...,798,402,6059,2285,1627,15835,1026,13474,6937,6537
Mar. 1992,159336,124200,147064,111928,142488,107352,38731,35136,31684,30195,...,714,419,6297,2484,1537,16848,1074,14346,7245,7101
Apr. 1992,163669,127587,151226,115144,147175,111093,40548,36082,32547,30583,...,801,393,6022,2506,1341,16494,1031,14065,7000,7065
May 1992,170068,133608,156619,120159,152420,115960,42213,36460,32883,31095,...,900,435,5803,2483,1163,17648,1047,15077,7532,7545


In [8]:
categories['Kind of Business'] = categories['Kind of Business'].str.replace("Retail sales and food services","RSFS",regex=True)
categories['Kind of Business'] = categories['Kind of Business'].str.replace("\(|\)|\.|,|'","",regex=True).str.replace("-"," ",regex=True)

transposed_df.columns = categories['Kind of Business'].to_list()
transposed_df = transposed_df.replace(to_replace="\(S\)|\(NA\)",value=np.nan,regex=True)
transposed_df = transposed_df.where((pd.notnull(transposed_df)), 0)

transposed_df.head()

Unnamed: 0,Retail and food services sales total,RSFS excl motor vehicle and parts,RSFS excl gasoline stations,RSFS excl motor vehicle and parts and gasoline stations,Retail sales total,Retail sales total excl motor vehicle and parts dealers,GAFO1,Motor vehicle and parts dealers,Automobile and other motor vehicle dealers,Automobile dealers,...,Gift novelty and souvenir stores,Used merchandise stores,Nonstore retailers,Electronic shopping and mail order houses,Fuel dealers,Food services and drinking places,Drinking places,Restaurants and other eating places,Full service restaurants,Limited service eating places
Jan. 1992,146376.0,116565.0,134277.0,104466.0,130683.0,100872.0,33906.0,29811.0,26788.0,25800.0,...,658.0,371.0,6860.0,2692.0,1916.0,15693.0,1049.0,13325.0,6887.0,6438.0
Feb. 1992,147079.0,115862.0,135499.0,104282.0,131244.0,100027.0,35220.0,31217.0,28203.0,27031.0,...,798.0,402.0,6059.0,2285.0,1627.0,15835.0,1026.0,13474.0,6937.0,6537.0
Mar. 1992,159336.0,124200.0,147064.0,111928.0,142488.0,107352.0,38731.0,35136.0,31684.0,30195.0,...,714.0,419.0,6297.0,2484.0,1537.0,16848.0,1074.0,14346.0,7245.0,7101.0
Apr. 1992,163669.0,127587.0,151226.0,115144.0,147175.0,111093.0,40548.0,36082.0,32547.0,30583.0,...,801.0,393.0,6022.0,2506.0,1341.0,16494.0,1031.0,14065.0,7000.0,7065.0
May 1992,170068.0,133608.0,156619.0,120159.0,152420.0,115960.0,42213.0,36460.0,32883.0,31095.0,...,900.0,435.0,5803.0,2483.0,1163.0,17648.0,1047.0,15077.0,7532.0,7545.0


In [9]:
melted_sales_df = pd.melt(transposed_df, var_name="Kind of Business", value_name="Sales", ignore_index=False)
melted_sales_df.head(10)

Unnamed: 0,Kind of Business,Sales
Jan. 1992,Retail and food services sales total,146376.0
Feb. 1992,Retail and food services sales total,147079.0
Mar. 1992,Retail and food services sales total,159336.0
Apr. 1992,Retail and food services sales total,163669.0
May 1992,Retail and food services sales total,170068.0
Jun. 1992,Retail and food services sales total,168663.0
Jul. 1992,Retail and food services sales total,169890.0
Aug. 1992,Retail and food services sales total,170364.0
Sep. 1992,Retail and food services sales total,164617.0
Oct. 1992,Retail and food services sales total,173655.0


In [10]:
melted_sales_df.reset_index(inplace=True)
melted_sales_df.head()

Unnamed: 0,index,Kind of Business,Sales
0,Jan. 1992,Retail and food services sales total,146376.0
1,Feb. 1992,Retail and food services sales total,147079.0
2,Mar. 1992,Retail and food services sales total,159336.0
3,Apr. 1992,Retail and food services sales total,163669.0
4,May 1992,Retail and food services sales total,170068.0


In [11]:
melted_sales_df.rename(columns={'index':'Month'}, inplace=True)
melted_sales_df.head()

Unnamed: 0,Month,Kind of Business,Sales
0,Jan. 1992,Retail and food services sales total,146376.0
1,Feb. 1992,Retail and food services sales total,147079.0
2,Mar. 1992,Retail and food services sales total,159336.0
3,Apr. 1992,Retail and food services sales total,163669.0
4,May 1992,Retail and food services sales total,170068.0


In [12]:
melted_sales_df['Month']=pd.to_datetime(melted_sales_df['Month'])
melted_sales_df.head()

Unnamed: 0,Month,Kind of Business,Sales
0,1992-01-01,Retail and food services sales total,146376.0
1,1992-02-01,Retail and food services sales total,147079.0
2,1992-03-01,Retail and food services sales total,159336.0
3,1992-04-01,Retail and food services sales total,163669.0
4,1992-05-01,Retail and food services sales total,170068.0


In [13]:
melted_sales_df.shape

(23920, 3)

### Exploring Percentage Change

In [14]:
# transposed_df = transposed_df.set_index('Month')

percent_changes = transposed_df.pct_change()

percent_changes.head()

# pd.options.display.float_format = '{:.1%}'.format

Unnamed: 0,Retail and food services sales total,RSFS excl motor vehicle and parts,RSFS excl gasoline stations,RSFS excl motor vehicle and parts and gasoline stations,Retail sales total,Retail sales total excl motor vehicle and parts dealers,GAFO1,Motor vehicle and parts dealers,Automobile and other motor vehicle dealers,Automobile dealers,...,Gift novelty and souvenir stores,Used merchandise stores,Nonstore retailers,Electronic shopping and mail order houses,Fuel dealers,Food services and drinking places,Drinking places,Restaurants and other eating places,Full service restaurants,Limited service eating places
Jan. 1992,,,,,,,,,,,...,,,,,,,,,,
Feb. 1992,0.004803,-0.006031,0.009101,-0.001761,0.004293,-0.008377,0.038754,0.047164,0.052822,0.047713,...,0.212766,0.083558,-0.116764,-0.151189,-0.150835,0.009049,-0.021926,0.011182,0.00726,0.015377
Mar. 1992,0.083336,0.071965,0.085351,0.07332,0.085672,0.07323,0.099688,0.125541,0.123427,0.117051,...,-0.105263,0.042289,0.03928,0.08709,-0.055317,0.063972,0.046784,0.064717,0.0444,0.086278
Apr. 1992,0.027194,0.027271,0.028301,0.028733,0.032894,0.034848,0.046913,0.026924,0.027238,0.01285,...,0.121849,-0.062053,-0.043672,0.008857,-0.127521,-0.021011,-0.040037,-0.019587,-0.033816,-0.00507
May 1992,0.039097,0.047191,0.035662,0.043554,0.035638,0.04381,0.041062,0.010476,0.010324,0.016741,...,0.123596,0.10687,-0.036367,-0.009178,-0.132737,0.069965,0.015519,0.071952,0.076,0.067941


In [15]:
melted_pc_df = pd.melt(percent_changes, var_name="Kind of Business", value_name="Percent Change", ignore_index=False)
melted_pc_df.head(10)

Unnamed: 0,Kind of Business,Percent Change
Jan. 1992,Retail and food services sales total,
Feb. 1992,Retail and food services sales total,0.004803
Mar. 1992,Retail and food services sales total,0.083336
Apr. 1992,Retail and food services sales total,0.027194
May 1992,Retail and food services sales total,0.039097
Jun. 1992,Retail and food services sales total,-0.008261
Jul. 1992,Retail and food services sales total,0.007275
Aug. 1992,Retail and food services sales total,0.00279
Sep. 1992,Retail and food services sales total,-0.033734
Oct. 1992,Retail and food services sales total,0.054903


In [16]:
melted_pc_df.reset_index(inplace=True)
melted_pc_df.head()

Unnamed: 0,index,Kind of Business,Percent Change
0,Jan. 1992,Retail and food services sales total,
1,Feb. 1992,Retail and food services sales total,0.004803
2,Mar. 1992,Retail and food services sales total,0.083336
3,Apr. 1992,Retail and food services sales total,0.027194
4,May 1992,Retail and food services sales total,0.039097


In [17]:
melted_pc_df.rename(columns={'index':'Month'}, inplace=True)
melted_pc_df.head()

Unnamed: 0,Month,Kind of Business,Percent Change
0,Jan. 1992,Retail and food services sales total,
1,Feb. 1992,Retail and food services sales total,0.004803
2,Mar. 1992,Retail and food services sales total,0.083336
3,Apr. 1992,Retail and food services sales total,0.027194
4,May 1992,Retail and food services sales total,0.039097


In [18]:
melted_pc_df['Month']=pd.to_datetime(melted_pc_df['Month'])
melted_pc_df.head()

Unnamed: 0,Month,Kind of Business,Percent Change
0,1992-01-01,Retail and food services sales total,
1,1992-02-01,Retail and food services sales total,0.004803
2,1992-03-01,Retail and food services sales total,0.083336
3,1992-04-01,Retail and food services sales total,0.027194
4,1992-05-01,Retail and food services sales total,0.039097


In [19]:
melted_pc_df.shape

(23920, 3)

In [20]:
biz_sales = pd.merge(melted_sales_df, melted_pc_df,  how='left', on=['Month','Kind of Business'])
biz_sales.head()

Unnamed: 0,Month,Kind of Business,Sales,Percent Change
0,1992-01-01,Retail and food services sales total,146376.0,
1,1992-02-01,Retail and food services sales total,147079.0,0.004803
2,1992-03-01,Retail and food services sales total,159336.0,0.083336
3,1992-04-01,Retail and food services sales total,163669.0,0.027194
4,1992-05-01,Retail and food services sales total,170068.0,0.039097


In [21]:
biz_sales.shape

(23920, 4)

In [22]:
biz_sales.to_csv('data/mrtssales92-present.csv')

## Reference

US Census Bureau: Monthly Retail Trade. https://www.census.gov/retail/index.html.