# Discharge table (outcomes)

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import pprint
import missingno as msno
import os
import seaborn as sns

from helper_functions import open_table_list_columns, groupby_percent, groupby_plotsize, derive_discharge_vars, create_serial_epi

DATADIR = os.getenv('DATADIR')

### read in data, derive vars and count things

In [None]:
discharge = open_table_list_columns(DATADIR, 'Discharge')

In [None]:
discharge = create_serial_epi(discharge).copy()
discharge = derive_discharge_vars(discharge, DATADIR).copy()

In [None]:
print("There are {} rows in discharge and {} unique clients (Serial).\nTherefore is {} that rows uniquely identify clients".format(discharge.shape[0], discharge.Serial.nunique(), (discharge.shape[0]==discharge.Serial.nunique())))
print("On average there are {} discharges per client".format(discharge.shape[0]/discharge.Serial.nunique()))

### Missing

In [None]:
msno.matrix(discharge[['DischargeID',
 'Serial',
 'Episode',
 'EventID',
 'DischargeDate',
 'DateStarted',
 'LetterToGP',
 'LetterToClient',
 'ModalityStatusCode',
 'ReasonForClosure',
 'Agency',
 'YPSelfHarmExit',
 'YPUnsafeDrugExit',
 'YPOffendingExit',
 'YPCAFExit',
 'YPSaferSexExit',
 'YPSexualHealthExit',
 'YPRegisteredGPExit',
 'YPGoalsExit',
 'DischargeDestination',
 'YPLeadProfessionalExit',
 'YPMHServicesExit',
 'YPYOTExit',
 'YPSexualExploitationExit']].sample(250))

In [None]:
msno.matrix(discharge[['YPContactLearningDifficultyTreatExit',
 'YPSexuallyActiveTreatExit',
 'EngagedInUnsafeSexAtTrtmntEnd',
 'YPFreqOfUseOfDrug1AtTrtmntExit',
 'ChildProtectionIssues',
 'PlannedDateOfRecordRemoval',
 'NDTMSDischargeDate',
 'NDTMSReasonForClosure',
 'NDTMSDischargeEventID',
 'PrisonExitReason',
 'PrisonExitDestination',
 'ReferredTo',
 'PreReleaseReviewDate',
 'YPOfferedSupportFromOtherServices',
 'DateOfDeath',
 'AUDITScore',
 'ImmunisationsAdministered',
 'CounsellingDelivered',
 'ClientCommittedSuicide',
 'MentalHealthProblem',
 'CareStatusAtTreatmentExit',
 'DomesticAbuseAtTreatmentExit',
 'EducationEmploymentTrainingStatus',
 'AccommodationNeedAtTreatmentExit',
 'PregnantAtTreatmentExit',
 'ParentalStatusAtTreatmentExit',
 'ChildProtectionPlanAtTreatmentExit',
 'FeelAffectedBySubstanceMisuseAtTreatmentExit',
 'SexualRiskTakingAtTMExit',
 'GangInvolvementAtTMExit',
 'FemaleGenitalMutilationAtTMExit',
 'AsylumRefugeesAtTMExit',
 'AtRiskOfExclusion',
 'Sentenced']].sample(3000))

### Outcomes

In [None]:
groupby_plotsize(discharge, 'discharge_reason', figsize=(10, 40)) 

In [None]:
print("There are {} episodes with more than 1 discharge code".format(sum(discharge.groupby(['Serial', 'Episode', 'discharge_reason' ]).size().groupby(level=['Serial', 'Episode']).sum()>1)))

In [None]:
groupby_percent(discharge, 'collapsed_dreason', 'Serial')

In [None]:
discharge_gr = discharge.groupby('collapsed_dreason').size().sort_values(ascending=True)
discharge_reason = pd.DataFrame(discharge_gr)
discharge_reason.columns = ['frequency']
discharge_reason['percent'] = discharge_gr / discharge.shape[0]*100
print(discharge_reason.shape)
discharge_reason

In [None]:
13.251578 + 9.862891 + 5.865646

In [None]:
discharge_reason.reset_index().plot(x='collapsed_dreason', y='percent', kind='barh', figsize=(5, 5), color='#2B8CC4', legend=False)

In [None]:
discharge_gr = discharge.groupby('discharge_reason').size().sort_values(ascending=True)
discharge_reason = pd.DataFrame(discharge_gr)
discharge_reason.columns = ['frequency']
discharge_reason['percent'] = discharge_gr / discharge.shape[0]*100
discharge_reason

In [None]:
discharge_reason[discharge_reason['frequency']==1].shape

In [None]:
discharge_reason[discharge_reason['frequency']==2].shape

In [None]:
discharge_reason[discharge_reason['frequency']<10].shape

In [None]:
discharge_reason.shape

In [None]:
discharge_reason.reset_index().plot(x='discharge_reason', y='percent', kind='barh', figsize=(10, 20), color='#2B8CC4')

In [None]:
groupby_plotsize(discharge,'outcome')

In [None]:
groupby_percent(discharge,'outcome', 'Episode')

In [None]:
groupby_percent(discharge, 'DischargeDestination', 'serial_epi')

### More counting

In [None]:
discharge['YPContactLearningDifficultyTreatExit'].value_counts(dropna=False)

In [None]:
discharge['DomesticAbuseAtTreatmentExit'].value_counts(dropna=False)

In [None]:
discharge['EducationEmploymentTrainingStatus'].value_counts(dropna=False)

In [None]:
discharge['ChildProtectionIssues'].value_counts(dropna=False)

In [None]:
discharge['YPContactLearningDifficultyTreatExit'].shape

### Read in data created in explore_joins.ipynb
It's basically client + referral + episode + discharge + primary substance

In [None]:
rich_episode_level = open_table_list_columns(DATADIR, "rich_episode_level", compression='gzip')

In [None]:
rich_episode_level['Serial'] = rich_episode_level['Serial_x']
rich_episode_level['Episode'] = rich_episode_level['Episode_x']
episode_level =  create_serial_epi(rich_episode_level).copy()
episode_level = derive_discharge_vars(episode_level, DATADIR).copy()

### substance type and outcome

In [None]:
pd.crosstab(episode_level.SubstanceType, episode_level.collapsed_dreason, margins=True)

### presenting problem and outcome

In [None]:
pd.crosstab(episode_level.PresentingProblem, episode_level.collapsed_dreason, margins=True)

In [None]:
x = pd.crosstab(episode_level.collapsed_dreason,episode_level.PresentingProblem, normalize='all')
sns.heatmap(x, annot=False)

In [None]:
episode_level.groupby(['PresentingProblem' , 'collapsed_dreason']).size().sort_values(ascending=True).plot(kind = 'barh', figsize=(5, 5))

### Young people

In [None]:
yp = episode_level[episode_level['IsYP']=='1'].copy()

discharge_gr = yp.groupby('collapsed_dreason').size().sort_values(ascending=True)
discharge_reason = pd.DataFrame(discharge_gr)
discharge_reason.columns = ['frequency']
discharge_reason['percent'] = discharge_gr / yp.shape[0]*100
print(discharge_reason)
discharge_reason


discharge_reason.reset_index().plot(x='collapsed_dreason', y='percent', kind='barh', figsize=(5, 5), color='#2B8CC4', legend=False)

In [None]:
yp[['PresentingProblem']].groupby('PresentingProblem').size().sort_values(ascending=True)

In [None]:
x = pd.crosstab(yp.collapsed_dreason, yp.PresentingProblem, normalize='all')
sns.heatmap(x, annot=False)

### Not young people

In [None]:
yp = episode_level[episode_level['IsYP']=='0'].copy()

discharge_gr = yp.groupby('collapsed_dreason').size().sort_values(ascending=True)
discharge_reason = pd.DataFrame(discharge_gr)
discharge_reason.columns = ['frequency']
discharge_reason['percent'] = discharge_gr / yp.shape[0]*100
print(discharge_reason)
discharge_reason


discharge_reason.reset_index().plot(x='collapsed_dreason', y='percent', kind='barh', figsize=(5, 5), color='#2B8CC4', legend=False)

In [None]:
x = pd.crosstab(yp.collapsed_dreason, yp.PresentingProblem, normalize='all')
sns.heatmap(x, annot=False)

### outcomes over time
#### overall

In [None]:
episode_level['discharge_date'] = pd.to_datetime(episode_level.DischargeDate)
episode_level.index = episode_level['discharge_date']
grouped = episode_level[['discharge_date','collapsed_dreason']].groupby(['collapsed_dreason', pd.Grouper(freq='Y')]).count()
ax = grouped.unstack(level='collapsed_dreason')['discharge_date'].plot()
ax.set_xlim(pd.Timestamp('2007-01-01'), pd.Timestamp('2019-01-01'))
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax

#### not young people

In [None]:
yp = episode_level[episode_level['IsYP']=='0'].copy()
yp['discharge_date'] = pd.to_datetime(yp.DischargeDate)
yp.index = yp['discharge_date']
grouped = yp[['discharge_date','collapsed_dreason']].groupby(['collapsed_dreason', pd.Grouper(freq='Y')]).count()
ax = grouped.unstack(level='collapsed_dreason')['discharge_date'].plot()
ax.set_xlim(pd.Timestamp('2007-01-01'), pd.Timestamp('2019-01-01'))
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax

#### young people

In [None]:
yp = episode_level[episode_level['IsYP']=='1'].copy()
yp['discharge_date'] = pd.to_datetime(yp.DischargeDate)
yp.index = yp['discharge_date']
grouped = yp[['discharge_date','collapsed_dreason']].groupby(['collapsed_dreason', pd.Grouper(freq='Y')]).count()
ax = grouped.unstack(level='collapsed_dreason')['discharge_date'].plot()
ax.set_xlim(pd.Timestamp('2007-01-01'), pd.Timestamp('2019-01-01'))
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax