In [1]:
import os
import pandas as pd

from openpyxl import load_workbook
import matplotlib.pyplot as plt
import seaborn as sns

def overwrite_excel(file, new_sheet_name, df):
    book = load_workbook(file)
    writer = pd.ExcelWriter(file, engine='openpyxl') 
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
    df.to_excel(writer, new_sheet_name)
    return writer.save()


## R.2

In [2]:
cali_gdp_detail = pd.read_csv('Data_input/GDP_metro.csv', header=4)
cali_gdp_detail = cali_gdp_detail.replace('(D)',0)

In [3]:
short_names = ['Riverside','Sacramento','Salt Lake City', 'San Francisco','SLO']
name_map = {}
for long_name, short_name in zip(cali_gdp_detail.GeoName.unique(),short_names):
    name_map[long_name] = short_name
name_map

{'Riverside-San Bernardino-Ontario, CA (Metropolitan Statistical Area)': 'Riverside',
 'Sacramento-Roseville-Folsom, CA (Metropolitan Statistical Area)': 'Sacramento',
 'Salt Lake City, UT (Metropolitan Statistical Area)': 'Salt Lake City',
 'San Francisco-Oakland-Berkeley, CA (Metropolitan Statistical Area)': 'San Francisco',
 'San Luis Obispo-Paso Robles, CA (Metropolitan Statistical Area)': 'SLO'}

In [4]:
cali_gdp_detail.GeoName = cali_gdp_detail.GeoName.map(name_map)
cali_gdp_detail = cali_gdp_detail.drop(['GeoFips','LineCode'],axis=1)
cali_gdp_detail.head(4)

Unnamed: 0,GeoName,Description,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Riverside,All industry total,128316045,131792356,132669480,137405833,143289193,151135822,155513808,160344031,164487230,171884061
1,Riverside,Private industries,104093752,107602251,108933880,113477859,118877945,126037125,129244993,134094895,138128908,145147475
2,Riverside,"Agriculture, forestry, fishing and hunting",1271595,1475910,1137421,1098184,1478534,1523365,1541613,1504196,1443111,1563179
3,Riverside,"Mining, quarrying, and oil and gas extraction",260022,233995,295277,281389,318575,303927,199474,219098,325079,319960


### R.2.1 Melt

In [5]:
cali_wide = cali_gdp_detail.melt(id_vars=['GeoName','Description'], var_name='Date', value_name='Value')
cali_wide.Value = cali_wide.Value.astype('float')
cali_wide.head(4)

Unnamed: 0,GeoName,Description,Date,Value
0,Riverside,All industry total,2010,128316045.0
1,Riverside,Private industries,2010,104093752.0
2,Riverside,"Agriculture, forestry, fishing and hunting",2010,1271595.0
3,Riverside,"Mining, quarrying, and oil and gas extraction",2010,260022.0


## R.2.2 Pivot table

In [6]:
cali_wide = cali_wide.pivot_table(index=['GeoName','Date'], columns='Description', values='Value')
cali_wide.head(4)

Unnamed: 0_level_0,Description,Accommodation and food services,Administrative and support and waste management and remediation services,"Arts, entertainment, and recreation",Durable goods manufacturing,Educational services,Finance and insurance,Health care and social assistance,Management of companies and enterprises,Nondurable goods manufacturing,"Professional, scientific, and technical services",...,Wholesale trade,Private industries,All industry total,Government and government enterprises,Manufacturing and information,Natural resources and mining,Private goods-producing industries 2/,Private services-providing industries 3/,Trade,Transportation and utilities
GeoName,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Riverside,2010,3916544.0,4494654.0,893191.0,6141786.0,1048056.0,3585599.0,9202562.0,856748.0,4422189.0,4087931.0,...,7337461.0,104093752.0,128316045.0,24218263.0,12838115.0,1535286.0,17296490.0,86785752.0,19674082.0,8319461.0
Riverside,2011,4078344.0,4832879.0,909594.0,6593931.0,1050261.0,3835199.0,9670406.0,946628.0,4300521.0,4171702.0,...,7757935.0,107602251.0,131792356.0,24188794.0,13146160.0,1707319.0,18429339.0,89170354.0,20087725.0,8429550.0
Riverside,2012,4198568.0,4858484.0,940823.0,6566894.0,1053035.0,3826077.0,10167089.0,933398.0,5064268.0,4131884.0,...,7770729.0,108933880.0,132669480.0,23735599.0,13838814.0,1432697.0,19141367.0,89792514.0,19791768.0,9046653.0
Riverside,2013,4240384.0,4580416.0,1017312.0,6731429.0,987468.0,3672090.0,10545266.0,999740.0,5409844.0,4158078.0,...,8787026.0,113477859.0,137405833.0,23932470.0,14597609.0,1379742.0,20163170.0,93314366.0,21325096.0,8969062.0


In [7]:
cali_wide.columns = [x.strip() for x in cali_wide.columns]

In [8]:
data_types = ['Manufacturing','Finance and insurance','Construction', 'Information']

In [9]:
cali_wide.reset_index(inplace=True)
cali_wide.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Columns: 36 entries, GeoName to Transportation and utilities
dtypes: float64(34), object(2)
memory usage: 14.2+ KB


In [10]:
idx = pd.IndexSlice
cols = [#'Accommodation and food services',
       #'Administrative and support and waste management and remediation services',
       #'Arts, entertainment, and recreation', 'Durable goods manufacturing',
       #'Educational services', 
    #'Finance and insurance',
       #'Health care and social assistance',
       #'Management of companies and enterprises',
       'Nondurable goods manufacturing',
       #'Professional, scientific, and technical services',
       'Real estate and rental and leasing',
       #'Agriculture, forestry, fishing and hunting',
       'Arts, entertainment, recreation, accommodation, and food services',
       'Construction',
       #'Educational services, health care, and social assistance',
       'Finance, insurance, real estate, rental, and leasing', 
    'Information',
       'Manufacturing', 
    'Mining, quarrying, and oil and gas extraction',
      # 'Other services (except government and government enterprises)',
       'Professional and business services', 
    'Retail trade',
       #'Transportation and warehousing', 
    #'Utilities', 'Wholesale trade',
       'Private industries', 
    #'All industry total',
       #'Government and government enterprises',
       #'Manufacturing and information', 
    #'Natural resources and mining',
       #'Private goods-producing industries 2/',
       #'Private services-providing industries 3/', 'Trade',
       #'Transportation and utilities'
]

In [11]:
diff = cali_wide.set_index('Date').groupby('GeoName')[cols].last()-cali_wide.set_index('Date').groupby('GeoName')[cols].first()
pct = diff/cali_wide.set_index('Date').groupby('GeoName')[cols].first()*100
#pct.to_csv('City_industry_change_2010_2019.csv')
pct

Unnamed: 0_level_0,Nondurable goods manufacturing,Real estate and rental and leasing,"Arts, entertainment, recreation, accommodation, and food services",Construction,"Finance, insurance, real estate, rental, and leasing",Information,Manufacturing,"Mining, quarrying, and oil and gas extraction",Professional and business services,Retail trade,Private industries
GeoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Riverside,10.764533,44.54969,43.608695,69.324401,37.380569,41.612437,21.416575,23.051126,32.05227,19.003505,39.439181
SLO,48.857487,41.273251,44.4013,35.394991,34.337069,181.531859,40.449858,-55.6014,43.436683,30.822105,30.256036
Sacramento,26.400553,49.686332,51.36872,71.653252,35.907058,27.562546,2.331704,0.146161,34.065605,27.837438,36.575679
Salt Lake City,-3.937224,63.563048,44.769005,61.429696,47.476365,122.01739,-2.679458,-24.222053,61.622594,40.133401,39.078046
San Francisco,16.030364,44.156166,46.375366,51.772211,38.676614,272.393701,34.703998,-46.638545,74.207414,28.037579,62.420958


### R.2.3 Pivot table

In [12]:
cali_gdp_detail.pivot(index='GeoName', columns='Description').head(2)

Unnamed: 0_level_0,2010,2010,2010,2010,2010,2010,2010,2010,2010,2010,...,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019
Description,Accommodation and food services,Administrative and support and waste management and remediation services,"Arts, entertainment, and recreation",Durable goods manufacturing,Educational services,Finance and insurance,Health care and social assistance,Management of companies and enterprises,Nondurable goods manufacturing,"Professional, scientific, and technical services",...,Private industries,Addenda:,All industry total,Government and government enterprises,Manufacturing and information,Natural resources and mining,Private goods-producing industries 2/,Private services-providing industries 3/,Trade,Transportation and utilities
GeoName,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Riverside,3916544,4494654,893191,6141786,1048056,3585599,9202562,856748,4422189,4087931,...,145147475,,171884061,26816650,15961801,1900704,23634443,121531132,27735638,12521797
SLO,490909,309337,79967,307991,37400,360360,906926,128264,397851,590073,...,14712233,,16660813,1957118,1475604,0,0,0,1593403,0


In [13]:
cali_gdp_detail.melt(id_vars=['GeoName','Description']).head(2)

Unnamed: 0,GeoName,Description,variable,value
0,Riverside,All industry total,2010,128316045
1,Riverside,Private industries,2010,104093752
