In [None]:
%%bash 

pwd

In [103]:
"""
Modified: 2019-12-01
This script pulls down the ENTRY_EXIT_191102.tsv file form BIOS611 github
Calculates the time spent in UMD shelter (days).

"""

import pandas as pd
import numpy as np

# Download the ENTRY_EXIT_191102.tsv place it in Pandas dataFrame
url = 'https://raw.githubusercontent.com/biodatascience/datasci611/gh-pages/data/project2_2019/ENTRY_EXIT_191102.tsv'
df_ee = pd.read_csv(url, sep = '\t') 

# Download the CLIENT_191102.tsv change data structure to Pandas dataFrame
url2 = 'https://raw.githubusercontent.com/biodatascience/datasci611/gh-pages/data/project2_2019/CLIENT_191102.tsv'
df_client = pd.read_csv(url2, sep = '\t')

# Drop all columns except Client ID, Entry Date and Exit Date
df_ee = df_ee[['Client ID', 'Entry Date', 'Exit Date']]

# Drop any row with a Nan value
df_ee = df_ee.dropna(how = 'any')

# Conver the entry and exit dates into pandas datetime structure
df_ee['entry_date'] = pd.to_datetime(df_ee['Entry Date'],infer_datetime_format=True)
df_ee['exit_date'] = pd.to_datetime(df_ee['Exit Date'],infer_datetime_format=True)
df_ee['time_spent'] = np.int64((df_ee['exit_date'] - df_ee['entry_date']).dt.days)

# Annotate df_ee DF with Client Race metadata 
df_ee['client_race'] = df_client[df_client['Client ID'].isin(df_ee['Client ID'].to_list())]['Client Primary Race']

# Annoate df_ee DF with Gender metatdata
df_ee['client_gender'] = df_client[df_client['Client ID'].isin(df_ee['Client ID'].to_list())]['Client Gender']

# Ammotate df_ee DF with Venteran status
df_ee['client_vet_status'] = df_client[df_client['Client ID'].isin(df_ee['Client ID'].to_list())]['Client Veteran Status']

# Output text file that can later be used with downstream R analysis
df_ee.to_csv('../results/client_entry_exit_annotated.txt', sep = '\t', index = False)

In [99]:
%%writefile race_v_time-in-shelter.py

"""
Modifed: 2019-12-01
Create a violin plot of Race v. time spent at UMD Shelter

"""

import pandas as pd
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
import matplotlib.pyplot as plt
import seaborn as sns

# Read in client_entry_exit_annotated.txt
df = pd.read_csv('./results/client_entry_exit_annotated.txt', sep = '\t')

fig, ax = plt.subplots(figsize = (10,5))

sns.violinplot(x = 'client_race', y = 'time_spent', data = df, ax = ax)

#Update x_tick label names
labels = [name.get_text() for name in ax.get_xticklabels()]
updated_labels = [name.rstrip(' (HUD)') for name in labels]
updated_labels
ax.set_xticklabels(updated_labels)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)

# Set the axes labels
ax.set_ylabel('Time spent at UMD Shelter (Days)', fontweight = 'bold')
ax.set_xlabel('Race', fontweight = 'bold')

# Figure formatting
sns.despine()

labels = [name.get_text() for name in ax.get_xticklabels()]
updated_labels = [name.rstrip(' (HUD)') for name in labels]
updated_labels

plt.savefig('./results/race_v_time_spent.png', bbox_inches = "tight", transparent = True, dpi  = 300)

Writing race_v_time-in-shelter.py


In [100]:
%%writefile gender_v_time-in-shelter.py

"""
Modifed: 2019-12-01
Create a violin plot of Client Gender v. time spent at UMD Shelter

"""

import pandas as pd
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
import matplotlib.pyplot as plt
import seaborn as sns

# Read in client_entry_exit_annotated.txt
df = pd.read_csv('./results/client_entry_exit_annotated.txt', sep = '\t')

fig, ax = plt.subplots(figsize = (7,4))

sns.violinplot(x = 'client_gender', y = 'time_spent', data = df, ax = ax)

#Update x_tick label names
labels = [name.get_text() for name in ax.get_xticklabels()]
updated_labels = [name.rstrip(' (HUD)') for name in labels]
updated_labels
ax.set_xticklabels(updated_labels)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)

# Set the axes labels
ax.set_ylabel('Time spent at UMD Shelter (Days)', fontweight = 'bold')
ax.set_xlabel('Gender', fontweight = 'bold')

# Figure formatting
sns.despine()

labels = [name.get_text() for name in ax.get_xticklabels()]
updated_labels = [name.rstrip(' (HUD)') for name in labels]
updated_labels

plt.savefig('./results/gender_v_time_spent.png', bbox_inches = "tight", transparent = True, dpi = 300)

Writing gender_v_time-in-shelter.py


In [101]:
%%writefile gender_race_v_time-in-shelter.py

"""
Modifed: 2019-12-01
Create a violin plot of Client Gender v. time spent at UMD Shelter (Broken down by race)

"""

import pandas as pd
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
import matplotlib.pyplot as plt
import seaborn as sns

# Read in client_entry_exit_annotated.txt
df = pd.read_csv('./results/client_entry_exit_annotated.txt', sep = '\t')

fig, ax = plt.subplots(figsize = (15,7))

sns.violinplot(x = 'client_gender', y = 'time_spent', hue = 'client_race', data = df, ax = ax)

#Update x_tick label names
labels = [name.get_text() for name in ax.get_xticklabels()]
updated_labels = [name.rstrip(' (HUD)') for name in labels]
updated_labels
ax.set_xticklabels(updated_labels)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)

# Set the axes labels
ax.set_ylabel('Time spent at UMD Shelter (Days)', fontweight = 'bold')
ax.set_xlabel('Gender', fontweight = 'bold')

# Figure formatting
sns.despine()

labels = [name.get_text() for name in ax.get_xticklabels()]
updated_labels = [name.rstrip(' (HUD)') for name in labels]
updated_labels

plt.savefig('./results/gender_v_time_spent_race.png', bbox_inches = "tight", transparent = True, dpi = 300)

Writing gender_race_v_time-in-shelter.py


In [102]:
%%writefile veteran_v_time-in-shelter.py

"""
Modifed: 2019-12-01
Create a violin plot of Client Veteran status v. time spent at UMD Shelter (Broken down by race)

"""

import pandas as pd
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
import matplotlib.pyplot as plt
import seaborn as sns

# Read in client_entry_exit_annotated.txt
df = pd.read_csv('./results/client_entry_exit_annotated.txt', sep = '\t')

fig, ax = plt.subplots(figsize = (15,7))

sns.violinplot(x = 'client_vet_status', y = 'time_spent', data = df, ax = ax)

#Update x_tick label names
labels = [name.get_text() for name in ax.get_xticklabels()]
updated_labels = [name.rstrip(' (HUD)') for name in labels]
updated_labels
ax.set_xticklabels(updated_labels)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 0)

# Set the axes labels
ax.set_ylabel('Time spent at UMD Shelter (Days)', fontweight = 'bold')
ax.set_xlabel('Veteran', fontweight = 'bold')

# Figure formatting
sns.despine()

labels = [name.get_text() for name in ax.get_xticklabels()]
updated_labels = [name.rstrip(' (HUD)') for name in labels]
updated_labels

plt.savefig('./results/vet-status_v_time_spent_race.png', bbox_inches = "tight", transparent = True, dpi = 300)

Writing veteran_v_time-in-shelter.py
