# Veteran Status Script
Run all of these code blocks in order to create the VETERAN_STATUS data table.

### Import Statements

In [3]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Function to create row for each year

In [4]:
def veteran(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Convert percentage estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else None)

    # Move Total Civilian Population 18+ from row to column
    df['Total Civilian Population 18+'] = int(df.iloc[0]['Estimate'].replace(',',''))
    df.dropna(inplace=True)

    # Drop unnecessary columns
    df.drop(columns=['Estimate','Margin of Error'], inplace=True)

    # Calculate population estimates
    df['Veteran Population Estimate'] = df['Percentage']*df['Total Civilian Population 18+']

    # Drop unnecessary columns
    df.drop(columns=['Percentage'], inplace=True)

    # Change Label to Year
    df['Label'] = year
    df.rename(columns={'Label':'year'}, inplace=True)

    # Rename all columns with abbreviated underscored naming conventions
    df.rename(columns={'Total Civilian Population 18+':'total_civ_pop_18_plus', 'Veteran Population Estimate':'vets'}, inplace=True)

    # Rearrange columns
    df = df[['year','vets','total_civ_pop_18_plus']]

    return df

### Test that function works

In [5]:
veteran('data/2015/VETERAN_STATUS.CSV', 2015)

Unnamed: 0,year,vets,total_civ_pop_18_plus
1,2015,767.088,63924


### Test that function works for each year
Should return only "No file for year '2020'"

In [6]:
for year in range(2010,2023):
    file = 'data/'+str(year)+'/VETERAN_STATUS.CSV'

    try:
        veteran(file,year)
    except:
        print(f"No file for year '{year}'")

No file for year '2020'


### Run function for each year

In [7]:
VETERAN_STATUS_2022 = veteran('data/2022/VETERAN_STATUS.CSV',2022)
VETERAN_STATUS_2021 = veteran('data/2021/VETERAN_STATUS.CSV',2021)
VETERAN_STATUS_2019 = veteran('data/2019/VETERAN_STATUS.CSV',2019)
VETERAN_STATUS_2018 = veteran('data/2018/VETERAN_STATUS.CSV',2018)
VETERAN_STATUS_2017 = veteran('data/2017/VETERAN_STATUS.CSV',2017)
VETERAN_STATUS_2016 = veteran('data/2016/VETERAN_STATUS.CSV',2016)
VETERAN_STATUS_2015 = veteran('data/2015/VETERAN_STATUS.CSV',2015)
VETERAN_STATUS_2014 = veteran('data/2014/VETERAN_STATUS.CSV',2014)
VETERAN_STATUS_2013 = veteran('data/2013/VETERAN_STATUS.CSV',2013)
VETERAN_STATUS_2012 = veteran('data/2012/VETERAN_STATUS.CSV',2012)
VETERAN_STATUS_2011 = veteran('data/2011/VETERAN_STATUS.CSV',2011)
VETERAN_STATUS_2010 = veteran('data/2010/VETERAN_STATUS.CSV',2010)

### Combine each year into single dataframe containing all years

In [8]:
VETERAN_STATUS = pd.concat([VETERAN_STATUS_2022,VETERAN_STATUS_2021,VETERAN_STATUS_2019,VETERAN_STATUS_2018,VETERAN_STATUS_2017,VETERAN_STATUS_2016,VETERAN_STATUS_2015,VETERAN_STATUS_2014,VETERAN_STATUS_2013,VETERAN_STATUS_2012,VETERAN_STATUS_2011,VETERAN_STATUS_2010])
VETERAN_STATUS

Unnamed: 0,year,vets,total_civ_pop_18_plus
1,2022,1350.463,71077
1,2021,1900.125,70375
1,2019,1571.328,71424
1,2018,986.874,70491
1,2017,987.9,65860
1,2016,1036.881,60993
1,2015,767.088,63924
1,2014,1344.399,64019
1,2013,717.53,65230
1,2012,1476.528,61522


### Reset index on combined dataframe

In [9]:
VETERAN_STATUS.reset_index(inplace=True)
VETERAN_STATUS.drop(columns=['index'], inplace=True)
VETERAN_STATUS

Unnamed: 0,year,vets,total_civ_pop_18_plus
0,2022,1350.463,71077
1,2021,1900.125,70375
2,2019,1571.328,71424
3,2018,986.874,70491
4,2017,987.9,65860
5,2016,1036.881,60993
6,2015,767.088,63924
7,2014,1344.399,64019
8,2013,717.53,65230
9,2012,1476.528,61522


### Save final dataframe as CSV to 'final' folder in 'data' folder

In [10]:
VETERAN_STATUS.to_csv('data/final/VETERAN_STATUS.CSV', index=False)

### Check that dataframe was saved properly as CSV

In [11]:
df = pd.read_csv('data/final/VETERAN_STATUS.CSV')
df

Unnamed: 0,year,vets,total_civ_pop_18_plus
0,2022,1350.463,71077
1,2021,1900.125,70375
2,2019,1571.328,71424
3,2018,986.874,70491
4,2017,987.9,65860
5,2016,1036.881,60993
6,2015,767.088,63924
7,2014,1344.399,64019
8,2013,717.53,65230
9,2012,1476.528,61522
