## Transform

Transform data (extracted as 51 individual CSV files) into one large dataframe. Clean up and create unique rows and columns

In [1]:
import pandas as pd

In [2]:
pip install psycopg2

You should consider upgrading via the '/Users/AliciaLy/anaconda3/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
# Read CSV
alabama = pd.read_csv("../Data/States/Alabama.csv")

In [4]:
# Set Age (AGEP) as index
alabama = alabama.set_index("Age (AGEP)")
alabama.head()

Unnamed: 0_level_0,Total,Alabama/AL,Alaska/AK,Arizona/AZ,Arkansas/AR,California/CA,Colorado/CO,Connecticut/CT,Delaware/DE,District of Columbia/DC,...,"South America, Not Specified",Egypt,Ethiopia (2017 or later),Kenya (2017 or later),Nigeria,"Western Africa, Not Specified","Northern Africa or Other Africa, Not Specified","Eastern Africa, Not Specified",Australia,"Other US Island Areas, Oceania, Not Specified, or At Sea"
Age (AGEP),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-> Total,319852,253611,429,868,859,1674,983,393,64,167,...,162,36,0,12,105,0,69,103,109,128
-> Total ->,319852,253611,429,868,859,1674,983,393,64,167,...,162,36,0,12,105,0,69,103,109,128
Total 12th grade - no diploma,7237,5807,0,4,0,63,9,99,0,0,...,0,0,0,0,0,0,0,0,0,0
25 years Total 12th grade - no diploma,260,191,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
26 years Total 12th grade - no diploma,336,287,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# Transpose to switch rows with columns
alabama_df = alabama.T

In [6]:
# Add "To_State" so that each origin state on far left shows which state they moved to on far right
# Makes data multi-dimensional

alabama_df["To_State"] = "Alabama"


In [7]:
# Code for one state
alabama = pd.read_csv("../Data/States/Alabama.csv").set_index("Age (AGEP)").T
alabama["To_State"] = "Alabama"
alabama.head()

Age (AGEP),-> Total,-> Total ->,Total 12th grade - no diploma,25 years Total 12th grade - no diploma,26 years Total 12th grade - no diploma,27 years Total 12th grade - no diploma,28 years Total 12th grade - no diploma,29 years Total 12th grade - no diploma,30 years Total 12th grade - no diploma,31 years Total 12th grade - no diploma,...,67 years Total Doctorate degree,68 years Total Doctorate degree,69 years Total Doctorate degree,70 years Total Doctorate degree,71 years Total Doctorate degree,72 years Total Doctorate degree,73 years Total Doctorate degree,74 years Total Doctorate degree,75 years Total Doctorate degree,To_State
Total,319852,319852,7237,260,336,227,132,275,265,339,...,32,2,2,31,0,4,8,10,60,Alabama
Alabama/AL,253611,253611,5807,191,287,227,125,163,178,231,...,13,2,0,31,0,4,8,0,45,Alabama
Alaska/AK,429,429,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alabama
Arizona/AZ,868,868,4,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alabama
Arkansas/AR,859,859,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alabama


In [8]:
# list(alabama.columns)

In [9]:
# All states in Data folder
states_list = ["Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", \
               "Delaware", "District of Columbia", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", \
              "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", \
              "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", \
              "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", \
              "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", \
              "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming"]

# Dictionary to hold state name as key, dataframe as value
dict_state = {}

# For loop to iterate through all states to shape data
for state in states_list:
    df = pd.read_csv(f'../Data/States/{state}.csv').rename(columns = {"Age (AGEP)":"Origin"}).set_index("Origin").T
    df["To_State"] = f'{state}'
    dict_state[state] = df


In [10]:
# Can call any state dataframe when using state name as dictionary key
dict_state["California"]

Origin,-> Total,-> Total ->,Total 12th grade - no diploma,25 years Total 12th grade - no diploma,26 years Total 12th grade - no diploma,27 years Total 12th grade - no diploma,28 years Total 12th grade - no diploma,29 years Total 12th grade - no diploma,30 years Total 12th grade - no diploma,31 years Total 12th grade - no diploma,...,67 years Total Doctorate degree,68 years Total Doctorate degree,69 years Total Doctorate degree,70 years Total Doctorate degree,71 years Total Doctorate degree,72 years Total Doctorate degree,73 years Total Doctorate degree,74 years Total Doctorate degree,75 years Total Doctorate degree,To_State
Total,2681950,2681950,66579,2445,2779,2252,2453,2790,2382,2088,...,336,392,619,460,364,547,286,433,201,California
Alabama/AL,2174,2174,41,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,California
Alaska/AK,2083,2083,16,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,California
Arizona/AZ,17511,17511,278,1,0,0,32,28,0,0,...,0,0,0,12,0,0,0,0,0,California
Arkansas/AR,1882,1882,69,0,0,0,0,31,0,0,...,0,0,0,0,0,0,0,0,0,California
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Western Africa, Not Specified",339,339,24,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,California
"Northern Africa or Other Africa, Not Specified",1068,1068,25,0,0,0,0,0,11,0,...,0,0,0,0,0,0,0,0,0,California
"Eastern Africa, Not Specified",561,561,107,0,0,66,0,41,0,0,...,0,0,0,0,0,0,0,0,0,California
Australia,3401,3401,33,0,0,0,0,0,0,12,...,0,0,0,0,0,0,0,0,0,California


In [11]:
df = pd.concat(dict_state.values(), ignore_index=False)

In [12]:
df2 = df.rename_axis("Origin").reset_index()


In [13]:
df2.set_index(["To_State", "Origin"], inplace=True)

In [14]:
df2

Unnamed: 0_level_0,Origin,-> Total,-> Total ->,Total 12th grade - no diploma,25 years Total 12th grade - no diploma,26 years Total 12th grade - no diploma,27 years Total 12th grade - no diploma,28 years Total 12th grade - no diploma,29 years Total 12th grade - no diploma,30 years Total 12th grade - no diploma,31 years Total 12th grade - no diploma,...,66 years Total Doctorate degree,67 years Total Doctorate degree,68 years Total Doctorate degree,69 years Total Doctorate degree,70 years Total Doctorate degree,71 years Total Doctorate degree,72 years Total Doctorate degree,73 years Total Doctorate degree,74 years Total Doctorate degree,75 years Total Doctorate degree
To_State,Origin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,Total,319852,319852,7237,260,336,227,132,275,265,339,...,44,32,2,2,31,0,4,8,10,60
Alabama,Alabama/AL,253611,253611,5807,191,287,227,125,163,178,231,...,31,13,2,0,31,0,4,8,0,45
Alabama,Alaska/AK,429,429,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Alabama,Arizona/AZ,868,868,4,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Alabama,Arkansas/AR,859,859,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,"Western Africa, Not Specified",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Wyoming,"Northern Africa or Other Africa, Not Specified",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Wyoming,"Eastern Africa, Not Specified",54,54,54,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Wyoming,Australia,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
df2.rename(columns={df.columns[0]: "Total", df.columns[1]: "Total2"}, inplace=True)
df2 = df2.drop(columns=["Total2"])
df2

Unnamed: 0_level_0,Origin,Total,Total 12th grade - no diploma,25 years Total 12th grade - no diploma,26 years Total 12th grade - no diploma,27 years Total 12th grade - no diploma,28 years Total 12th grade - no diploma,29 years Total 12th grade - no diploma,30 years Total 12th grade - no diploma,31 years Total 12th grade - no diploma,32 years Total 12th grade - no diploma,...,66 years Total Doctorate degree,67 years Total Doctorate degree,68 years Total Doctorate degree,69 years Total Doctorate degree,70 years Total Doctorate degree,71 years Total Doctorate degree,72 years Total Doctorate degree,73 years Total Doctorate degree,74 years Total Doctorate degree,75 years Total Doctorate degree
To_State,Origin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,Total,319852,7237,260,336,227,132,275,265,339,263,...,44,32,2,2,31,0,4,8,10,60
Alabama,Alabama/AL,253611,5807,191,287,227,125,163,178,231,218,...,31,13,2,0,31,0,4,8,0,45
Alabama,Alaska/AK,429,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Alabama,Arizona/AZ,868,4,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Alabama,Arkansas/AR,859,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,"Western Africa, Not Specified",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Wyoming,"Northern Africa or Other Africa, Not Specified",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Wyoming,"Eastern Africa, Not Specified",54,54,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Wyoming,Australia,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
# Keep
df2.loc["Alabama", "Total"].iloc[3:].sum()

632207

## Load

Create engine and load all_states_df into SQL database

In [21]:
# Dependencies
from login import port, pw
from sqlalchemy import create_engine
from sqlalchemy.ext.automap import automap_base

In [20]:
base = automap_base()
base.prepare(engine, reflect=True)
base.classes.keys()

NameError: name 'engine' is not defined

In [25]:
# Create engine
engine = create_engine(f'postgresql://alicia:{pw}@localhost:{port}/interstate_migration_db')
# engine = create_engine("postgresql://postgres:postgres@localhost:5432/interstate_migration_db")

# Connect to pgAdmin 4
conn = engine.connect()

In [26]:
# Load all_states_df into Postgres database
df2.to_sql(name="states", con=engine, if_exists="replace", index=True)

In [27]:
# Confirm table names
engine.table_names()

['states']

In [28]:
# Query database for all listings from states
df3 = pd.read_sql_query("SELECT * FROM states", con=engine)
df3

Unnamed: 0,To_State,Origin,Total,Total 12th grade - no diploma,25 years Total 12th grade - no diploma,26 years Total 12th grade - no diploma,27 years Total 12th grade - no diploma,28 years Total 12th grade - no diploma,29 years Total 12th grade - no diploma,30 years Total 12th grade - no diploma,...,66 years Total Doctorate degree,67 years Total Doctorate degree,68 years Total Doctorate degree,69 years Total Doctorate degree,70 years Total Doctorate degree,71 years Total Doctorate degree,72 years Total Doctorate degree,73 years Total Doctorate degree,74 years Total Doctorate degree,75 years Total Doctorate degree
0,Alabama,Total,319852,7237,260,336,227,132,275,265,...,44,32,2,2,31,0,4,8,10,60
1,Alabama,Alabama/AL,253611,5807,191,287,227,125,163,178,...,31,13,2,0,31,0,4,8,0,45
2,Alabama,Alaska/AK,429,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Alabama,Arizona/AZ,868,4,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Alabama,Arkansas/AR,859,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5554,Wyoming,"Western Africa, Not Specified",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5555,Wyoming,"Northern Africa or Other Africa, Not Specified",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5556,Wyoming,"Eastern Africa, Not Specified",54,54,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5557,Wyoming,Australia,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [29]:
# Keep
# df2.sum(axis=1)

# Return sum of the values for destination state
# df2.sum(level="To_State")

# Return sum of the values for origin state
top_from_statedf = df2.sum(level="Origin").sort_values(by=["Total"], ascending=False).head(7)
top_from_statedf

Origin,Total,Total 12th grade - no diploma,25 years Total 12th grade - no diploma,26 years Total 12th grade - no diploma,27 years Total 12th grade - no diploma,28 years Total 12th grade - no diploma,29 years Total 12th grade - no diploma,30 years Total 12th grade - no diploma,31 years Total 12th grade - no diploma,32 years Total 12th grade - no diploma,...,66 years Total Doctorate degree,67 years Total Doctorate degree,68 years Total Doctorate degree,69 years Total Doctorate degree,70 years Total Doctorate degree,71 years Total Doctorate degree,72 years Total Doctorate degree,73 years Total Doctorate degree,74 years Total Doctorate degree,75 years Total Doctorate degree
Origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Total,23510597,439080,18205,16527,15412,14272,15195,15442,12454,12767,...,4048,3458,3666,3846,3445,2989,3147,2583,2952,2742
California/CA,2596049,64217,2577,2768,2111,2360,2597,2419,1966,2141,...,487,379,452,642,467,281,480,296,358,249
Texas/TX,1990045,34677,1158,1442,1130,910,1552,1554,1332,894,...,163,241,348,134,263,181,248,283,195,180
Florida/FL,1565488,35567,1590,1044,1035,932,1081,1297,965,1003,...,251,393,286,302,163,140,146,132,195,133
New York/NY,1133045,22528,1232,686,788,496,743,617,516,614,...,173,204,190,154,165,273,118,105,156,121
Illinois/IL,871122,14490,382,441,379,328,543,289,355,443,...,326,55,186,62,110,148,166,121,105,45
Ohio/OH,806189,14035,508,394,442,314,536,511,294,331,...,190,45,98,159,36,10,155,57,26,89


In [30]:
top_to_statedf = df2.sum(level="To_State").sort_values(by=["Total"], ascending=False)
top_to_statedf

Origin,Total,Total 12th grade - no diploma,25 years Total 12th grade - no diploma,26 years Total 12th grade - no diploma,27 years Total 12th grade - no diploma,28 years Total 12th grade - no diploma,29 years Total 12th grade - no diploma,30 years Total 12th grade - no diploma,31 years Total 12th grade - no diploma,32 years Total 12th grade - no diploma,...,66 years Total Doctorate degree,67 years Total Doctorate degree,68 years Total Doctorate degree,69 years Total Doctorate degree,70 years Total Doctorate degree,71 years Total Doctorate degree,72 years Total Doctorate degree,73 years Total Doctorate degree,74 years Total Doctorate degree,75 years Total Doctorate degree
To_State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
California,5363900,133158,4890,5558,4504,4906,5580,4764,4176,4630,...,904,672,784,1238,920,728,1094,572,866,402
Texas,4306160,74866,2536,3242,2416,2248,3170,3696,2548,1954,...,410,534,670,502,424,254,378,302,520,346
Florida,3616682,79564,3174,2088,2196,1996,1990,2790,1814,1864,...,640,818,848,1020,570,474,606,566,708,458
New York,2175298,44578,1906,1656,1758,932,1518,1272,924,1222,...,432,624,358,450,184,412,290,98,296,158
Illinois,1683440,29414,684,946,798,602,1182,452,682,878,...,332,140,304,142,240,312,206,156,266,108
Ohio,1639458,28730,944,882,912,722,1106,894,556,784,...,178,52,234,98,82,20,102,76,64,286
Pennsylvania,1607672,27800,1522,808,756,1008,860,554,832,476,...,164,236,154,228,360,102,270,126,208,460
Georgia,1577760,29934,1106,1424,928,1050,900,966,992,1274,...,404,216,404,210,200,230,186,76,16,20
North Carolina,1543152,23504,670,1058,1174,1832,198,498,488,980,...,190,184,110,276,234,132,154,266,320,102
Washington,1421976,23744,1248,992,1778,1272,466,1098,572,726,...,118,148,232,346,196,170,350,92,0,62


In [26]:
# # Keep
# del_t = []
# for x in df.columns:
#     if x #contains the word total

In [27]:
# Keep
#Isolate From state (index) and to state, and 

In [28]:
# Keep
#df[index, to_state, -> Total]

In [29]:
# Keep
#group index by index and to_state and get sum of the total
#groupby dataframe, if doesn't match statelist, do not include it

In [30]:
# Keep
# Sum total values of Alabama as origin state
# df.loc["Alabama/AL", " -> Total"].iloc[3:].sum()

# New df2, but sum of values does not equal original df
df2.loc["Alabama", "Total"].iloc[3:].sum()

632207

In [31]:
# Keep

In [32]:
# Keep df.loc["Alabama/AL"]

# New df2
df2.loc["Alabama"]

Unnamed: 0_level_0,Total,Total 12th grade - no diploma,25 years Total 12th grade - no diploma,26 years Total 12th grade - no diploma,27 years Total 12th grade - no diploma,28 years Total 12th grade - no diploma,29 years Total 12th grade - no diploma,30 years Total 12th grade - no diploma,31 years Total 12th grade - no diploma,32 years Total 12th grade - no diploma,...,66 years Total Doctorate degree,67 years Total Doctorate degree,68 years Total Doctorate degree,69 years Total Doctorate degree,70 years Total Doctorate degree,71 years Total Doctorate degree,72 years Total Doctorate degree,73 years Total Doctorate degree,74 years Total Doctorate degree,75 years Total Doctorate degree
Origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Total,319852,7237,260,336,227,132,275,265,339,263,...,44,32,2,2,31,0,4,8,10,60
Alabama/AL,253611,5807,191,287,227,125,163,178,231,218,...,31,13,2,0,31,0,4,8,0,45
Alaska/AK,429,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Arizona/AZ,868,4,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Arkansas/AR,859,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Western Africa, Not Specified",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Northern Africa or Other Africa, Not Specified",69,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"Eastern Africa, Not Specified",103,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Australia,109,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# Keep
# list(all_states_df.columns)
# drop_these = [" -> Total", " -> Total ->"]

In [None]:
# Keep
# all_states_df["Origin"]

In [None]:
# Keep
## need state to be a column, need origin to be a column
# reset index so they can be columns