# Transform

Clean up Census data downloaded from the [IPUMS online data analysis system](https://usa.ipums.org/usa/sda/)

In [2]:
import calculate
import pandas as pd

## Combine files we downloaded and process Hispanic totals

In [67]:
def total_latino(row):
    return sum([
        row.mexican,
        row.puerto_rican,
        row.cuban,
        row.other
    ])

In [68]:
def prep_hispanics(name, prefix="hispanics_la"):
    df = pd.read_csv(
        "ipums/input/{}_{}.csv".format(prefix, name),
        names=[
            'ind1990',
            'non_hispanic',
            'mexican',
            'puerto_rican',
            'cuban',
            'other',
            'total'
        ]
    )
    df['year'] = name
    df['ind1990_code'] = df.apply(lambda x: x.ind1990.split(":")[0], axis=1)
    df['latino'] = df.apply(total_latino, axis=1)
    df['latino_percent'] = df.apply(lambda x: calculate.percentage(x.latino, x.total), axis=1)
    df = df.set_index("ind1990_code")
    df.to_csv("ipums/output/{}_{}_transformed.csv".format(prefix, name))
    return df

In [69]:
def trim_hispanics(df):
    return df[df.index == '60'].reset_index()[['year', 'non_hispanic', 'latino', 'total', 'latino_percent']]

Combine and output hispanics working in construction in LA County

In [71]:
hispanic_80_la = prep_hispanics("1980")
hispanic_90_la = prep_hispanics("1990")
hispanic_00_la = prep_hispanics("2000")
hispanic_05_la = prep_hispanics("2005")
hispanic_10_la = prep_hispanics("2010")
hispanic_15_la = prep_hispanics("2015")

In [72]:
construction_hispanics_la = pd.concat([
    trim_hispanics(hispanic_80_la),
    trim_hispanics(hispanic_90_la),
    trim_hispanics(hispanic_00_la),
    trim_hispanics(hispanic_05_la),
    trim_hispanics(hispanic_10_la),
    trim_hispanics(hispanic_15_la),
]).set_index("year")

In [74]:
construction_hispanics_la.to_csv("ipums/output/hispanics_la_combined.csv")