# Deportation Paper


In [1]:
#Packages
import pandas as pd
import numpy as np
import pylab
from matplotlib import pyplot as plt
import statsmodels.formula.api as smf
import scipy.stats as st

%matplotlib inline

# Matplotlib customizations.
# plt.style.use("ggplot")                       # Use a different style.
plt.rcParams["figure.figsize"] = [10.0, 6.0]  # Make figures larger by default.
plt.rcParams["figure.dpi"] = 200              # Raise figure quality within the notebook.

# Pandas customizations.
pd.set_option("display.max_rows", 250)        # Raise the number of rows displayed.
pd.set_option("display.max_columns", 50)      # Raise the number of columns displayed.
pd.set_option("precision", 3)                 # Truncate floats to 3 decimals in data frames.

# font = {'size': 12}

# plt.rc('font', **font)

In [2]:
#Functions for ttest and ztest
# function for calculating the t-test for two independent samples
# Cohen's d https://researchpy.readthedocs.io/en/latest/ttest_documentation.html
def ttest(data1, data2):
    # calculate means
    mean1, mean2 = np.mean(data1), np.mean(data2)
    # calculate standard errors
    s1, s2 = np.std(data1), np.std(data2)
    n1, n2 = len(data1), len(data2)
    # standard error on the difference between the samples
    s = np.sqrt(((n1-1)*s1**2 + (n2-1)*s2**2)/(n1 + n2 - 2))
    se = s*np.sqrt((1/n1)+(1/n2))
    # calculate the t statistic
    t_stat = (mean1 - mean2)/se
    # degrees of freedom
    df = (((s1**2)/n1)+((s2**2)/n2))/((((s1**2)/n1)/(n1-1))+((((s2**2)/n2)/(n2-1))))
    # calculate the p-value
    p = (1.0 - st.t.cdf(abs(t_stat), df)) * 2.0
    # calculate cohen's d
    cohen = t_stat*np.sqrt((1/n1)+(1/n2))
#     cohen = (mean1 - mean2)/np.sqrt((((n1-1)*s1**2)+((n2-1)*s2**2))/(n1+n2-2))
    # return everything
    return t_stat, p, cohen

def ztestp(n1, t1, n2, t2):
    # calculate proportions
    p1, p2 =(n1/t1),(n2/t2)
    # calculate total prop
    p = (n1 + n2)/(t1 + t2)
    # standard error
    se = np.sqrt(p*(1-p)*((1/t1)+(1/t2)))
    # calculate the t statistic
    z_stat = (p1-p2)/se
    # calculate the p-value
    p = (1.0 - st.norm.cdf(abs(z_stat))) * 2.0
    # return everything
    return p

In [3]:
#Function for Cramer's V (effect size for chi-squared) See: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5426219/
def cramer(chi, n, df):
# chi = test statistic, n = number of observations, df = degrees of freedom = (rows - 1)(columns - 1)
    return np.sqrt(chi/(n*df))


In [4]:
#Read in Data
dataH = pd.read_csv('Survey2Data.csv', encoding="latin-1")
dataN = pd.read_csv("Survey1Data.csv")

In [5]:
#Number in Each Sample
print(dataH.shape[0], 'in Hispanic Sample')
print(dataN.shape[0], 'in Other Sample')


1080 in Hispanic Sample
514 in Other Sample


In [6]:
#Demographic Characteristics of Survey 1
print('\tDemographic Characteristics of Other Sample')
print('\t\t\t\t', "Number\t", "Percent\t")
Ntotal = dataN.shape[0]/100
print('Gender')
print('Female\t\t\t\t', dataN[dataN['Q1'] == 'Female'].shape[0], '\t',round(dataN[dataN['Q1'] == 'Female'].shape[0]/Ntotal,1))
print('Male\t\t\t\t', dataN[dataN['Q1'] == 'Male'].shape[0], '\t',round(dataN[dataN['Q1'] == 'Male'].shape[0]/Ntotal,1))
print('\nEducation Level')
print('High School\t\t\t', dataN[dataN['Q2'] == 'High School'].shape[0], '\t',round(dataN[dataN['Q2'] == 'High School'].shape[0]/Ntotal,1))
print('Some College\t\t\t', dataN[dataN['Q2'] == 'Some College'].shape[0], '\t',round(dataN[dataN['Q2'] == 'Some College'].shape[0]/Ntotal,1))
print('Bachelor\'s Degree\t\t', dataN[dataN['Q2'] == 'Bachelor\'s Degree'].shape[0], '\t',round(dataN[dataN['Q2'] == 'Bachelor\'s Degree'].shape[0]/Ntotal,1))
print('Master\'s Degree\t\t\t', dataN[dataN['Q2'] == 'Master\'s Degree'].shape[0], '\t',round(dataN[dataN['Q2'] == 'Master\'s Degree'].shape[0]/Ntotal,1))
print('Doctoral Degree or Equivalent\t', dataN[dataN['Q2'] == 'Doctoral Degree or Equivalent'].shape[0], '\t',round(dataN[dataN['Q2'] == 'Doctoral Degree or Equivalent'].shape[0]/Ntotal,1))
print('Other\t\t\t\t', dataN[dataN['Q2'] == 'Other'].shape[0], '\t',round(dataN[dataN['Q2'] == 'Other'].shape[0]/Ntotal,1))
print('\nRace/Ethnicity')
print('African American or Black\t', dataN[dataN['Q3'] == 'African American or Black'].shape[0], '\t',round(dataN[dataN['Q3'] == 'African American or Black'].shape[0]/Ntotal,1))
print('Asian\t\t\t\t', dataN[dataN['Q3'] == 'Asian'].shape[0], '\t',round(dataN[dataN['Q3'] == 'Asian'].shape[0]/Ntotal,1))
print('Caucasian or White\t\t', dataN[dataN['Q3'] == 'Caucasian or White'].shape[0], '\t',round(dataN[dataN['Q3'] == 'Caucasian or White'].shape[0]/Ntotal,1))
print('Hispanic or Latino\t\t', dataN[dataN['Q3'] == 'Hispanic or Latino'].shape[0], '\t',round(dataN[dataN['Q3'] == 'Hispanic or Latino'].shape[0]/Ntotal,1))
print('Native American\t\t\t', dataN[dataN['Q3'] == 'Native American or American Indian'].shape[0], '\t',round(dataN[dataN['Q3'] == 'Native American or American Indian'].shape[0]/Ntotal,1))
print('Pacific Islander\t\t', dataN[dataN['Q3'] == 'Pacific Islander'].shape[0], '\t',round(dataN[dataN['Q3'] == 'Pacific Islander'].shape[0]/Ntotal,1))
print('Other\t\t\t\t', dataN[dataN['Q3'] == 'Other'].shape[0], '\t',round(dataN[dataN['Q3'] == 'Other'].shape[0]/Ntotal,1))
print('\nVote in 2016 Election')
print('Donald Trump\t\t\t', dataN[dataN['Q4'] == 'Donald Trump'].shape[0], '\t',round(dataN[dataN['Q4'] == 'Donald Trump'].shape[0]/Ntotal,1))
print('Hillary Clinton\t\t\t', dataN[dataN['Q4'] == 'Hillary Clinton'].shape[0], '\t',round(dataN[dataN['Q4'] == 'Hillary Clinton'].shape[0]/Ntotal,1))
print('Other\t\t\t\t', dataN[dataN['Q4'] == 'Other'].shape[0], '\t',round(dataN[dataN['Q4'] == 'Other'].shape[0]/Ntotal,1))
print('Did not vote\t\t\t', dataN[dataN['Q4'] == 'Did not vote'].shape[0], '\t',round(dataN[dataN['Q4'] == 'Did not vote'].shape[0]/Ntotal,1))
      
      

	Demographic Characteristics of Other Sample
				 Number	 Percent	
Gender
Female				 220 	 42.8
Male				 294 	 57.2

Education Level
High School			 50 	 9.7
Some College			 171 	 33.3
Bachelor's Degree		 209 	 40.7
Master's Degree			 66 	 12.8
Doctoral Degree or Equivalent	 17 	 3.3
Other				 1 	 0.2

Race/Ethnicity
African American or Black	 38 	 7.4
Asian				 37 	 7.2
Caucasian or White		 388 	 75.5
Hispanic or Latino		 32 	 6.2
Native American			 9 	 1.8
Pacific Islander		 2 	 0.4
Other				 8 	 1.6

Vote in 2016 Election
Donald Trump			 173 	 33.7
Hillary Clinton			 194 	 37.7
Other				 64 	 12.5
Did not vote			 83 	 16.1


In [7]:
#Demographic Characteristics of Survey 2 (Hispanic Sample)
print('\tDemographic Characteristics of Hispanic Sample')
print('\t\t\tNumber', '\tPercent')
B = dataH[dataH['BornInUS'] == 1].shape[0]
NB = dataH[dataH['BornInUS'] == 0].shape[0]
print('Birthplace')
print('Born in US\t\t', B, '\t', round((B/(B + NB))*100,1))
print('Foreign Born\t\t', NB,'\t', round((NB/(B + NB))*100,1), '\n')

M = dataH[dataH['Sex1'] == 'Male'].shape[0]
F = dataH[dataH['Sex1'] == 'Female'].shape[0]
print('Gender')
print('Male\t\t\t', M, '\t', round((M/(M + F))*100,1))
print('Female\t\t\t', F,'\t', round((F/(M + F))*100,1), '\n')

S = dataH[dataH['Language'] == 'ES'].shape[0]
E = dataH[dataH['Language'] == 'EN'].shape[0]
print('Language')
print('Spanish\t\t\t', S, '\t', round((S/(S + E))*100,1))
print('English\t\t\t', E,'\t', round((E/(S + E))*100,1), '\n')

LY = dataH[dataH['yearsInUS'] == 1].shape[0]
MY = dataH[dataH['yearsInUS'] == 2].shape[0]
HY = dataH[(dataH['yearsInUS'] == 3) | (dataH['yearsInUS'] == 4) | (dataH['yearsInUS'] == 5)].shape[0]
print('Years in US')
print('5 or fewer years\t', LY, '\t', round((LY/(LY+MY+HY))*100,1))
print('6-10 years\t\t', MY, '\t', round((MY/(LY+MY+HY))*100,1))
print('11+ years\t\t', HY, '\t', round((HY/(LY+MY+HY))*100,1), '\n')

K = dataH[dataH['knowUndocumented'] == 1].shape[0]
DK = dataH[dataH['knowUndocumented'] == 0].shape[0]
print('Know Undocumented?')
print('Undocumented\t\t', K, '\t', round((K/(K + DK))*100,1))
print('Don\'t Know\t\t', DK,'\t', round((DK/(K + DK))*100,1), '\n')

LA = dataH[(dataH['AgeOrig'] == '18-24') | (dataH['AgeOrig'] == '25-30')].shape[0]
MA = dataH[(dataH['AgeOrig'] == '31-40') | (dataH['AgeOrig'] == '41-50')].shape[0]
HA = dataH[(dataH['AgeOrig'] == '51-60') | (dataH['AgeOrig'] == '61+')].shape[0]
print('Age')
print('18-30 years old\t\t', LA, '\t', round((LA/(LA + MA + HA))*100,1))
print('31-50 years old\t\t', MA, '\t', round((MA/(LA + MA + HA))*100,1))
print('51+ years old\t\t', HA,'\t', round((HA/(LA + MA + HA))*100,1), '\n')

VT = dataH[dataH['Vote'] == 'Trump'].shape[0]
VC = dataH[dataH['Vote'] == 'Clinton'].shape[0]
VO = dataH[dataH['Vote'] == 'Other'].shape[0]
NV = dataH[dataH['Vote'] == 'Did Not Vote'].shape[0]
print('Vote in 2016 Election')
print('Donald Trump\t\t', VT, '\t', round((VT/(VT+VC+VO+NV)*100),1))
print('Hillary Clinton\t\t', VC, '\t', round((VC/(VT+VC+VO+NV)*100),1))
print('Other\t\t\t', VO, '\t', round((VO/(VT+VC+VO+NV)*100),1))
print('Did Not Vote\t\t', NV, '\t', round((NV/(VT+VC+VO+NV)*100),1))

	Demographic Characteristics of Hispanic Sample
			Number 	Percent
Birthplace
Born in US		 685 	 71.7
Foreign Born		 271 	 28.3 

Gender
Male			 452 	 42.5
Female			 612 	 57.5 

Language
Spanish			 155 	 14.4
English			 925 	 85.6 

Years in US
5 or fewer years	 94 	 8.8
6-10 years		 66 	 6.2
11+ years		 907 	 85.0 

Know Undocumented?
Undocumented		 590 	 55.4
Don't Know		 475 	 44.6 

Age
18-30 years old		 416 	 39.0
31-50 years old		 429 	 40.2
51+ years old		 222 	 20.8 

Vote in 2016 Election
Donald Trump		 201 	 18.9
Hillary Clinton		 440 	 41.3
Other			 108 	 10.1
Did Not Vote		 317 	 29.7


In [8]:
#Importance of Issue of Immigration Figure
# ax6 = dataN['Q5_3'].plot(kind = 'hist', bins = 25, label = 'Mean = ' + str(round(dataN['Q5_3'].mean(),4)))
# # ax6.set_title('Importance of the Issue of Immigration', fontsize = 14)
# ax6.set_ylabel('Frequency',fontsize = 14)
# ax6.set_facecolor('ghostwhite')
# leg6 = ax6.legend(handlelength=0, handletextpad=0, fancybox=True, fontsize = 14)
# for item in leg6.legendHandles:
#     item.set_visible(False)
# plt.savefig('importance.jpeg')

In [9]:
#Summary of Deport/Don't Deport Questions
print('In general, do you believe illegal immigrants with the following characteristics should be allowed to stay in the US, or should they be deported?')
print('\t\t\t\tShould be deported\tShould be allowed to stay in US')

D1 = dataN[dataN['I9#1_1'] == 'Should be deported'].shape[0]
ND1 = dataN[dataN['I9#1_1'] == 'Should be allowed to stay in US'].shape[0]
print('Have Children Born in the US\t', D1, round(D1/Ntotal,1), '\t\t', ND1, round(ND1/Ntotal,1),'\n')

D2 = dataN[dataN['I9#1_2'] == 'Should be deported'].shape[0]
ND2 = dataN[dataN['I9#1_2'] == 'Should be allowed to stay in US'].shape[0]
print('Have Lived in the US 5 years\t', D2, round(D2/Ntotal,1), '\t\t', ND2, round(ND2/Ntotal,1))
print('or more')

D3 = dataN[dataN['I9#1_3'] == 'Should be deported'].shape[0]
ND3 = dataN[dataN['I9#1_3'] == 'Should be allowed to stay in US'].shape[0]
print('Are currently employed in the\t', D3, round(D3/Ntotal,1), '\t\t', ND3, round(ND3/Ntotal,1))
print('US')

D4 = dataN[dataN['I9#1_4'] == 'Should be deported'].shape[0]
ND4 = dataN[dataN['I9#1_4'] == 'Should be allowed to stay in US'].shape[0]
print('Are attending college or\t', D4, round(D4/Ntotal,1), '\t\t', ND4, round(ND4/Ntotal,1))
print('advance schooling in the US')

D5 = dataN[dataN['I9#1_5'] == 'Should be deported'].shape[0]
ND5 = dataN[dataN['I9#1_5'] == 'Should be allowed to stay in US'].shape[0]
print('Came to the US as children\t', D5, round(D5/Ntotal,1), '\t\t', ND5, round(ND5/Ntotal,1),'\n')

D6 = dataN[dataN['I9#1_6'] == 'Should be deported'].shape[0]
ND6 = dataN[dataN['I9#1_6'] == 'Should be allowed to stay in US'].shape[0]
print('Are under the age of 18\t\t', D6, round(D6/Ntotal,1), '\t\t', ND6, round(ND6/Ntotal,1),'\n')

D7 = dataN[dataN['I9#1_7'] == 'Should be deported'].shape[0]
ND7 = dataN[dataN['I9#1_7'] == 'Should be allowed to stay in US'].shape[0]
print('Have been convicted of a\t', D7, round(D7/Ntotal,1), '\t\t', ND7, round(ND7/Ntotal,1))
print('serious crime in the US')

D8 = dataN[dataN['I9#1_8'] == 'Should be deported'].shape[0]
ND8 = dataN[dataN['I9#1_8'] == 'Should be allowed to stay in US'].shape[0]
print('Do not speak English\t\t', D8, round(D8/Ntotal,1), '\t\t', ND8, round(ND8/Ntotal,1))



In general, do you believe illegal immigrants with the following characteristics should be allowed to stay in the US, or should they be deported?
				Should be deported	Should be allowed to stay in US
Have Children Born in the US	 167 32.5 		 347 67.5 

Have Lived in the US 5 years	 187 36.4 		 327 63.6
or more
Are currently employed in the	 192 37.4 		 322 62.6
US
Are attending college or	 183 35.6 		 331 64.4
advance schooling in the US
Came to the US as children	 116 22.6 		 398 77.4 

Are under the age of 18		 149 29.0 		 365 71.0 

Have been convicted of a	 462 89.9 		 52 10.1
serious crime in the US
Do not speak English		 234 45.5 		 280 54.5


In [10]:
#Summary of Personal Deport/Don't Deport Questions
print('Carlos is an illegal immigrant who came to the US from Guatemala 3 months ago. He currently lives with his cousins in California where he works in construction. Carlos is 25 years old and speaks limited English.')
print('Should Carlos be deported?')
PD1 = dataN[dataN['I11'] == 'Yes'].shape[0]
PND1 = dataN[dataN['I11'] == 'No'].shape[0]
print('Yes\t\t', PD1, round(PD1/Ntotal,1))
print('No\t\t', PND1, round(PND1/Ntotal,1),'\n')

print('Enrique and his wife Carla are illegal immigrants who came to the US from Mexico with their two children, ages 6 and 8. Since coming to the US 12 years ago, Enrique and Carla have started their own restaurant and have had another child, who is a US citizen. Their older two children now attend a local community college.')
print('Should Enrique and Carla be deported?')
PD2 = dataN[dataN['I12'] == 'Yes'].shape[0]
PND2 = dataN[dataN['I12'] == 'No'].shape[0]
print('Yes\t\t', PD2, round(PD2/Ntotal,1))
print('No\t\t', PND2, round(PND2/Ntotal,1),'\n')

print('Enrique and Carla\'s oldest two children, Maria and Javier, were 6 and 8 years old when they were brought to the US 12 years ago. Both are illegal immigrants currently attending a community college in Oregon. Maria plans on becoming an engineer and Javier plans on becoming a doctor.')
print('Should Maria and Javier be deported?')
PD3 = dataN[dataN['I13'] == 'Yes'].shape[0]
PND3 = dataN[dataN['I13'] == 'No'].shape[0]
print('Yes\t\t', PD3, round(PD3/Ntotal,1))
print('No\t\t', PND3, round(PND3/Ntotal,1))



Carlos is an illegal immigrant who came to the US from Guatemala 3 months ago. He currently lives with his cousins in California where he works in construction. Carlos is 25 years old and speaks limited English.
Should Carlos be deported?
Yes		 269 52.3
No		 245 47.7 

Enrique and his wife Carla are illegal immigrants who came to the US from Mexico with their two children, ages 6 and 8. Since coming to the US 12 years ago, Enrique and Carla have started their own restaurant and have had another child, who is a US citizen. Their older two children now attend a local community college.
Should Enrique and Carla be deported?
Yes		 151 29.4
No		 363 70.6 

Enrique and Carla's oldest two children, Maria and Javier, were 6 and 8 years old when they were brought to the US 12 years ago. Both are illegal immigrants currently attending a community college in Oregon. Maria plans on becoming an engineer and Javier plans on becoming a doctor.
Should Maria and Javier be deported?
Yes		 128 24.9
No		 

In [11]:
# Holm-Bonferroni Multiple Comparisons Correction

p_values = ([[0,0.00013465816481554761], [1,0.027532402600017145], [2,0.33311159753933695], [3,0.014988844566817892], 
             [4,0.33434619121041564], [5,0.25608034719160977], [6,0.10606223602223615], [7,0.7014537596799826],
             [8,6.865705792561976e-19], [9,9.609778458806433e-22], [10,3.607052090092857e-21], [11,5.718617717236056e-21],
             [12,4.091913244118564e-15], [13,1.0570388942961983e-19],  [14,0.9050212925293228], [15,5.422703078603426e-22],
             [16,0.0066031661788276425], [17,3.9925749120539e-08], [18,1.4796391594295986e-37], [19,6.439293542825908e-15]])
p_values.sort(key=lambda tup: tup[1]) 
for index, x in enumerate(p_values):
    x.append(.05/(20 - index))
p_values.sort(key=lambda tup: tup[0])


In [12]:
#Ethnicity/Vote Table Functions

White = dataN[dataN['Q3'] == 'Caucasian or White']
NonWhite = dataN[dataN['Q3'] != 'Caucasian or White']

Trump = dataN[dataN['Q4'] == 'Donald Trump']
Clinton = dataN[dataN['Q4'] == 'Hillary Clinton']

def table2(question, question1, question2, p_range):
    significant = ''

    W = White[White[question] == 'Should be deported'].shape[0]
    NW = NonWhite[NonWhite[question] == 'Should be deported'].shape[0]
    WN = White[White[question] == 'Should be allowed to stay in US'].shape[0]
    NWN = NonWhite[NonWhite[question] == 'Should be allowed to stay in US'].shape[0]
    WT = White.shape[0]/100
    NWT = NonWhite.shape[0]/100
    EthObs1 = np.array([[W,WN],[NW,NWN]])
    obs = W + WN + NW + NWN
    g, p, dof, expctd = st.chi2_contingency(EthObs1)
    
    if p < p_values[p_range][2]:
        significant = '*'

    print(question1, '\tDeport\t\t', W, round(W/WT,1), '\t', NW, round(NW/NWT,1), '\t', p, '\t', str(round(p_values[p_range][2],8)) + significant)
    print(question2, '\tAllow to Stay\t', WN, round(WN/WT,1), '\t', NWN, round(NWN/NWT,1),'\t', '(' + str(g) + ')\t', cramer(g,obs,dof), '\n')
    
def table3(question, question1, question2, p_range):
    significant = ''
    
    T = Trump[Trump[question] == 'Should be deported'].shape[0]
    C = Clinton[Clinton[question] == 'Should be deported'].shape[0]
    TN = Trump[Trump[question] == 'Should be allowed to stay in US'].shape[0]
    CN = Clinton[Clinton[question] == 'Should be allowed to stay in US'].shape[0]
    TT = Trump.shape[0]/100
    CT = Clinton.shape[0]/100
    obs = T + CN + TN + CN
    VoteObs1 = np.array([[T,TN],[C,CN]])
    g, p, dof, expctd = st.chi2_contingency(VoteObs1)

    if p < p_values[p_range][2]:
        significant = '*'
    
    print(question1, '\tDeport\t\t', T, round(T/TT,1), '\t', C, round(C/CT,1), '\t', p, '\t', str(round(p_values[p_range][2],8)) + significant)
    print(question2, '\tAllow to Stay\t', TN, round(TN/TT,1), '\t', CN, round(CN/CT,1),'\t', '(' + str(g) + ')\t', cramer(g,obs,dof), '\n')

    

In [13]:
#Cramer's V Interpretation
#See: https://books.google.com/books?hl=en&lr=&id=rEe0BQAAQBAJ&oi=fnd&pg=PP1&ots=sv0UPsROt5&sig=ROlD_oGEODW9mSbPT7JQ4Mm7shc#v=onepage&q&f=false
print('Interpretation of Cramer\'s V (effect size)\n')
print('Degrees of freedom\t', 'Small\t', 'Medium\t', 'Large')
print('\t',1,'\t\t', .1,'\t', .3,'\t', .5)
print('\t',2,'\t\t', .07,'\t', .21,'\t', .35)
print('\t',3,'\t\t', .06,'\t', .17,'\t', .29)


Interpretation of Cramer's V (effect size)

Degrees of freedom	 Small	 Medium	 Large
	 1 		 0.1 	 0.3 	 0.5
	 2 		 0.07 	 0.21 	 0.35
	 3 		 0.06 	 0.17 	 0.29


In [14]:
#Ethnicity Tables
print('In general, do you believe illegal immigrants with the following characteristics should be allowed to stay in the US, or should they be deported?')
questions = [('I9#1_1','Have Children\t', 'Born in the US\t'),('I9#1_2','Have Lived in the', 'US 5 years or more'),('I9#1_3','Are currently employed', 'in the US\t'),('I9#1_4','Are attending college', 'in the US\t'),('I9#1_5','Came to the US\t', 'as children\t'),('I9#1_6','Are under the\t', 'age of 18\t'),('I9#1_7','Have been convicted', 'of a serious crime'),('I9#1_8','Do not speak\t', 'English\t\t')]
print('\t\t\t\t\tWhite\t\tNon-White\tChi-Squared p-value/\tHolm-Bonf. Alpha/') 
print('\t\t\t\t\t\t\t\t\t(test statistic)\t','Cramer\'s V')
for index, q in enumerate(questions):
    table2(q[0],q[1],q[2],index)



In general, do you believe illegal immigrants with the following characteristics should be allowed to stay in the US, or should they be deported?
					White		Non-White	Chi-Squared p-value/	Holm-Bonf. Alpha/
									(test statistic)	 Cramer's V
Have Children	 	Deport		 144 37.1 	 23 18.3 	 0.00013465816481554761 	 0.005*
Born in the US	 	Allow to Stay	 244 62.9 	 103 81.7 	 (14.575440710864493)	 0.1683950371671859 

Have Lived in the 	Deport		 152 39.2 	 35 27.8 	 0.027532402600017145 	 0.00714286
US 5 years or more 	Allow to Stay	 236 60.8 	 91 72.2 	 (4.857110980400878)	 0.09720921899485284 

Are currently employed 	Deport		 150 38.7 	 42 33.3 	 0.33311159753933695 	 0.0125
in the US	 	Allow to Stay	 238 61.3 	 84 66.7 	 (0.936763427769199)	 0.04269071258699264 

Are attending college 	Deport		 150 38.7 	 33 26.2 	 0.014988844566817892 	 0.00625
in the US	 	Allow to Stay	 238 61.3 	 93 73.8 	 (5.917778630999711)	 0.10729952468698971 

Came to the US	 	Deport		 92 23.7 	 24 19.0 	 0.3

In [15]:
#Vote Tables
print('In general, do you believe illegal immigrants with the following characteristics should be allowed to stay in the US, or should they be deported?')
questions = [('I9#1_1','Have Children\t', 'Born in the US\t'),('I9#1_2','Have Lived in the', 'US 5 years or more'),('I9#1_3','Are currently employed', 'in the US\t'),('I9#1_4','Are attending college', 'in the US\t'),('I9#1_5','Came to the US\t', 'as children\t'),('I9#1_6','Are under the\t', 'age of 18\t'),('I9#1_7','Have been convicted', 'of a serious crime'),('I9#1_8','Do not speak\t', 'English\t\t')]
print('\t\t\t\t\tTrump\t\tClinton\t\tChi-Squared p-value/\tHolm-Bonf. Alpha/')  
print('\t\t\t\t\t\t\t\t\t(test statistic)\tCramer\'s V')
for index, q in enumerate(questions):
    table3(q[0],q[1],q[2], index + 8)


In general, do you believe illegal immigrants with the following characteristics should be allowed to stay in the US, or should they be deported?
					Trump		Clinton		Chi-Squared p-value/	Holm-Bonf. Alpha/
									(test statistic)	Cramer's V
Have Children	 	Deport		 98 56.6 	 24 12.4 	 6.865705792561976e-19 	 0.00357143*
Born in the US	 	Allow to Stay	 75 43.4 	 170 87.6 	 (78.80201341289552)	 0.39193132382885254 

Have Lived in the 	Deport		 111 64.2 	 29 14.9 	 9.609778458806433e-22 	 0.00277778*
US 5 years or more 	Allow to Stay	 62 35.8 	 165 85.1 	 (91.79580866234659)	 0.4271962517388173 

Are currently employed 	Deport		 113 65.3 	 32 16.5 	 3.607052090092857e-21 	 0.00294118*
in the US	 	Allow to Stay	 60 34.7 	 162 83.5 	 (89.17873647148687)	 0.42359659749273043 

Are attending college 	Deport		 108 62.4 	 28 14.4 	 5.718617717236056e-21 	 0.003125*
in the US	 	Allow to Stay	 65 37.6 	 166 85.6 	 (88.26711933347893)	 0.418074604487979 

Came to the US	 	Deport		 77 44.5 	 16 8

In [16]:
#Ethnicity/Vote Table Functions2

def table4(question):
    W = White[White[question] == 'Yes'].shape[0]
    NW = NonWhite[NonWhite[question] == 'Yes'].shape[0]
    WN = White[White[question] == 'No'].shape[0]
    NWN = NonWhite[NonWhite[question] == 'No'].shape[0]
    WT = White.shape[0]/100
    NWT = NonWhite.shape[0]/100
    EthnObs2 = np.array([[W,WN],[NW,NWN]])
    g, p, dof, expctd = st.chi2_contingency(EthnObs2)
    print('\t\tWhite\t\tNon-White\tChi-Squared p-value')     
    print('Yes\t\t', W, round(W/WT,1), '\t', NW, round(NW/NWT,1), '\t', round(p,8))
    print('No\t\t', WN, round(WN/WT,1), '\t', NWN, round(NWN/NWT,1), '\n')
    
def table5(question):
    T = Trump[Trump[question] == 'Yes'].shape[0]
    C = Clinton[Clinton[question] == 'Yes'].shape[0]
    TN = Trump[Trump[question] == 'No'].shape[0]
    CN = Clinton[Clinton[question] == 'No'].shape[0]
    TT = Trump.shape[0]/100
    CT = Clinton.shape[0]/100
    VoteObs2 = np.array([[T,TN],[C,CN]])
    g, p, dof, expctd = st.chi2_contingency(VoteObs2)
    print('\t\tTrump\t\tClinton\t\tChi-Squared p-value')      
    print('Yes\t\t', T, round(T/TT,1), '\t', C, round(C/CT,1), '\t', round(p,8))
    print('No\t\t', TN, round(TN/TT,1), '\t', CN, round(CN/CT,1), '\n')

In [17]:
#Generally, all illegal immigrants should be deported by demographic characteristics
print('\t\"Generally, all illegal immigrants should be deported\" by demographic characteristics')
print('\t\t\tDeport\t\tDon\'t Deport\t\tChi-Squared p-value/\t\tHolm-Bonf. Alpha/')
print('\t\t\t\t\t\t\t\t(test statistic)\t\tCramer\'s V')
BornUS = dataH[dataH['BornInUS'] == 1]
BornForeign = dataH[dataH['BornInUS'] == 0]
Male1 = dataH[dataH['Sex1'] == 'Male']
Female1 = dataH[dataH['Sex1'] == 'Female']
Spanish = dataH[dataH['Language'] == 'ES']
English = dataH[dataH['Language'] == 'EN']
Lyears = dataH[dataH['yearsInUS'] == 1]
Myears = dataH[dataH['yearsInUS'] == 2]
Hyears = dataH[(dataH['yearsInUS'] == 3) | (dataH['yearsInUS'] == 4) | (dataH['yearsInUS'] == 5)]
know = dataH[dataH['knowUndocumented'] == 1]
NoKnow = dataH[dataH['knowUndocumented'] == 0]
Lage = dataH[(dataH['AgeOrig'] == '18-24') | (dataH['AgeOrig'] == '25-30')] 
Mage = dataH[(dataH['AgeOrig'] == '31-40') | (dataH['AgeOrig'] == '41-50')]
Hage = dataH[(dataH['AgeOrig'] == '51-60') | (dataH['AgeOrig'] == '61+')]
Trump1 = dataH[dataH['Vote'] == 'Trump']
Clinton1 = dataH[dataH['Vote'] == 'Clinton']
OtherH = dataH[dataH['Vote'] == 'Other']
NoVote1 = dataH[dataH['Vote'] == 'Did Not Vote']


M1 = Male1[Male1['deportIllegal'] == 1].shape[0]
M1N = Male1[Male1['deportIllegal'] == 0].shape[0]
M1T = Male1.dropna(subset=['deportIllegal']).shape[0]/100
F1 = Female1[Female1['deportIllegal'] == 1].shape[0]
F1N = Female1[Female1['deportIllegal'] == 0].shape[0]
F1T = Female1.dropna(subset=['deportIllegal']).shape[0]/100
GenderObs1 = np.array([[M1,M1N],[F1,F1N]])
g, p, dof, expctd = st.chi2_contingency(GenderObs1)
obs = M1 + M1N + M1T + F1 + F1N + F1T
significant = ''
if p < p_values[16][2]:
    significant = '*'
print('Male\t\t\t', M1, round(M1/M1T,1), '\t', M1N, round(M1N/M1T,1), '\t\t', p, '\t\t', str(round(p_values[16][2],8)) + significant)
print('Female\t\t\t', F1, round(F1/F1T,1), '\t', F1N, round(F1N/F1T,1),'\t\t', '(' + str(g) + ')\t\t', cramer(g, obs, dof),'\n')


LA = Lage[Lage['deportIllegal'] == 1].shape[0]
LAN = Lage[Lage['deportIllegal'] == 0].shape[0]
LAT = Lage.dropna(subset=['deportIllegal']).shape[0]/100
MA = Mage[Mage['deportIllegal'] == 1].shape[0]
MAN = Mage[Mage['deportIllegal'] == 0].shape[0]
MAT = Mage.dropna(subset=['deportIllegal']).shape[0]/100
HA = Hage[Hage['deportIllegal'] == 1].shape[0]
HAN = Hage[Hage['deportIllegal'] == 0].shape[0]
HAT = Hage.dropna(subset=['deportIllegal']).shape[0]/100
AgeObs = np.array([[LA,LAN],[MA,MAN],[HA,HAN]])
g, p, dof, expctd = st.chi2_contingency(AgeObs)
obs = LA + LAN + LAT + MA + MAN + MAT + HA + HAN + HAT
significant = ''
if p < p_values[17][2]:
    significant = '*'
print('18-30 years old\t\t', LA, round(LA/LAT,1), '\t', LAN, round(LAN/LAT,1), '\t\t', p, '\t\t', str(round(p_values[17][2],8)) + significant)
print('31-50 years old\t\t', MA, round(MA/MAT,1), '\t', MAN, round(MAN/MAT,1), '\t\t', '(' + str(g) + ')\t\t', cramer(g, obs, dof))
print('51+ years old\t\t', HA, round(HA/HAT,1), '\t', HAN, round(HAN/HAT,1), '\t\tdf:', dof, '\n')

T1 = Trump1[Trump1['deportIllegal'] == 1].shape[0]
C1 = Clinton1[Clinton1['deportIllegal'] == 1].shape[0]
OH = OtherH[OtherH['deportIllegal'] == 1].shape[0]
NV1 = NoVote1[NoVote1['deportIllegal'] == 1].shape[0]
T1N = Trump1[Trump1['deportIllegal'] == 0].shape[0]
C1N = Clinton1[Clinton1['deportIllegal'] == 0].shape[0]
OHN = OtherH[OtherH['deportIllegal'] == 0].shape[0]
NV1N = NoVote1[NoVote1['deportIllegal'] == 0].shape[0]
T1T = Trump1.dropna(subset=['deportIllegal']).shape[0]/100
C1T = Clinton1.dropna(subset=['deportIllegal']).shape[0]/100
OHT = OtherH.dropna(subset=['deportIllegal']).shape[0]/100
NV1T = NoVote1.dropna(subset=['deportIllegal']).shape[0]/100
VoteObs1 = np.array([[T1,T1N],[C1,C1N],[OH,OHN],[NV1,NV1N]])
g, p, dof, expctd = st.chi2_contingency(VoteObs1)
obs = T1 + C1 + OH + NV1 + T1N + C1N + OHN + NV1N + T1T + C1T + OHT + NV1T
significant = ''
if p < p_values[18][2]:
    significant = '*'
print('Trump\t\t\t', T1, round(T1/T1T,1), '\t', T1N, round(T1N/T1T,1), '\t\t', p, '\t', str(round(p_values[18][2],8)) + significant)
print('Clinton\t\t\t', C1, round(C1/C1T,1), '\t', C1N, round(C1N/C1T,1), '\t\t', '(' + str(g) + ')\t\t', cramer(g,obs,dof))
print('Other\t\t\t', OH, round(OH/OHT,1), '\t', OHN, round(OHN/OHT,1), '\t\tdf:', dof)
print('Did Not Vote\t\t', NV1, round(NV1/NV1T,1), '\t', NV1N, round(NV1N/NV1T,1), '\n')
Total = dataH[dataH['deportIllegal'] == 1].shape[0]
TotalN = dataH[dataH['deportIllegal'] == 0].shape[0] 
TotalT = dataH.dropna(subset=['deportIllegal']).shape[0]/100
print('Total\t\t\t', Total, round(Total/TotalT,1), '\t', TotalN, round(TotalN/TotalT,1), '\t\t', TotalT*100)



	"Generally, all illegal immigrants should be deported" by demographic characteristics
			Deport		Don't Deport		Chi-Squared p-value/		Holm-Bonf. Alpha/
								(test statistic)		Cramer's V
Male			 173 39.1 	 270 60.9 		 0.0066031661788276425 		 0.00555556
Female			 185 30.8 	 416 69.2 		 (7.377860934508783)		 0.08364775629154389 

18-30 years old		 108 26.3 	 302 73.7 		 3.9925749120539e-08 		 0.00454545*
31-50 years old		 142 33.9 	 277 66.1 		 (34.072488759653794)		 0.12692667487769857
51+ years old		 108 49.5 	 110 50.5 		df: 2 

Trump			 145 74.0 	 51 26.0 		 1.4796391594295986e-37 	 0.0025*
Clinton			 93 21.3 	 343 78.7 		 (174.32843423262727)		 0.2345292194804173
Other			 33 30.8 	 74 69.2 		df: 3
Did Not Vote		 88 28.7 	 219 71.3 

Total			 360 34.3 	 689 65.7 		 1049.0


In [18]:
# Cohen's d (effect size for difference in means), 
#See: http://onlinestatbook.com/2/effect_size/two_means.html#:~:text=Effect%20Size%3A%20Difference%20Between%20Two%20Means&text=When%20the%20units%20of%20a,interpretable%20measure%20of%20effect%20size.
#"effect size is measured in terms of the number of standard deviations the means differ by"

In [19]:
#Compare two samples on discrimination question
#Caucasian or White, Hispanic or Latino, Asian, African American or Black, Native American or American Indian, Other
t_stat, p, cohen = ttest(dataH.dropna(subset=['TrumpDiscrimination'])['TrumpDiscrimination'],dataN[dataN['Q3'] != 'Hispanic or Latino']['I14_1'])
print('Since the campaign and election of Donald Trump, do you feel levels of discrimination towards Latinos/Hispanics have increased?')
print('Hispanic Sample Mean:', round(dataH.dropna(subset=['TrumpDiscrimination'])['TrumpDiscrimination'].mean(),4))
print('Non-Hispanic Sample Mean:',round(dataN[dataN['Q3'] != 'Hispanic or Latino']['I14_1'].mean(),4))
print('Difference-in-means p value:', p)
print('Difference-in-means test statistic:', t_stat)
print('Holm-Bonferroni Corrected Alpha:', str(p_values[19][2])+'*')
print('Cohen\'s d:', cohen)


Since the campaign and election of Donald Trump, do you feel levels of discrimination towards Latinos/Hispanics have increased?
Hispanic Sample Mean: 5.257
Non-Hispanic Sample Mean: 4.3849
Difference-in-means p value: 6.439293542825908e-15
Difference-in-means test statistic: 8.01646337173131
Holm-Bonferroni Corrected Alpha: 0.004166666666666667*
Cohen's d: 0.44323192486357693
