# Referral table

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import pprint
import missingno as msno
import seaborn as sns
from helper_functions import open_table_list_columns, groupby_percent, groupby_plotsize, create_serial_epi

import os
DATADIR = os.getenv('DATADIR')

## Load data and derive variables

In [None]:
referral = open_table_list_columns(DATADIR, 'Referral')

In [None]:
referral['referral_date'] = pd.to_datetime(referral.ReferralDate)

In [None]:
referral.index = referral['referral_date']

In [None]:
referral = create_serial_epi(referral)

In [None]:
referral_codes = pd.read_csv(os.path.join(DATADIR, 'Referral_codes.csv'), dtype=object, delimiter='|', encoding = "ISO-8859-1")
referral_codes = dict(zip(referral_codes.Code, referral_codes.Text))
referral['referral_source_label'] = referral['ReferralSource'].map(referral_codes)
len(referral_codes)

### Count some things

In [None]:
print("there are {} rows and {} columns in the table".format(referral.shape[0], referral.shape[1]))

In [None]:
print("there are {} unique clients in referral data".format(referral.Serial.nunique()))

In [None]:
print("there are {} unique episodes in referral data".format(referral.serial_epi.nunique()))

So a referral for every episode. Nice

In [None]:
print("there are {} unique organisations in referral data".format(referral.Organisation.nunique()))

In [None]:
print("there are {} missing referral modalities in referral data".format(sum(referral['ReferredModalities'].isna())))

### How many referrals/episodes over time

In [None]:
referral.groupby(referral['referral_date'].dt.date).size().plot()

In [None]:
ax = referral.groupby(referral['referral_date'].dt.date).size().plot()
ax.set_xlim(pd.Timestamp('2007-01-01'), pd.Timestamp('2019-01-01'))
ax

In [None]:
referral_freq = referral.groupby(referral['referral_date'].dt.date).size() 
referral_freq[referral_freq > 200]

### Presenting problem

In [None]:
groupby_plotsize(referral,'PresentingProblem')

In [None]:
ax = grouped.unstack(level='PresentingProblem')['Episode'].plot()
ax.set_xlim(pd.Timestamp('2007-01-01'), pd.Timestamp('2019-01-01'))
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax

### Treatment referred options

In [None]:
groupby_plotsize(referral, 'TreatmentReferredOption1', figsize=(10, 30))

In [None]:
groupby_plotsize(referral, 'TreatmentReferredOption2', figsize=(10, 30))

In [None]:
groupby_plotsize(referral, 'TreatmentReferredOption3', figsize=(10, 30))

### I don't know what this variable means...

In [None]:
referral.groupby('IsPrimary').size().sort_values(ascending=True).plot(kind = 'barh', figsize=(10, 2), color='#2B8CC4')

### referral source

In [None]:
groupby_plotsize(referral, 'referral_source_label', figsize=(10, 30))

In [None]:
groupby_percent(referral, 'referral_source_label','Episode')

In [None]:
referral['collapsed_source'] = 'other'
referral.loc[referral['referral_source_label'] == 'Self', 'collapsed_source'] = 'self'
referral.loc[referral['referral_source_label'] == 'GP', 'collapsed_source'] = 'GP'
referral.loc[referral['referral_source_label'] == 'Arrest Referral', 'collapsed_source'] = 'arrest referral'
referral.loc[referral['referral_source_label'] == 'Drug Service Statutory', 'collapsed_source'] = 'statutory drug service'
referral.loc[referral['referral_source_label'] == 'CARAT / Prison', 'collapsed_source'] = 'CARAT/prison'
referral.loc[referral['referral_source_label'] == 'Drug service non- statutory', 'collapsed_source'] = 'non-statutory drug service'
referral.loc[referral['referral_source_label'] == 'Hospital', 'collapsed_source'] = 'hospital'
referral.loc[referral['referral_source_label'] == 'Probation', 'collapsed_source'] = 'probation'
referral.loc[referral['referral_source_label'] == 'Community Alcohol Team', 'collapsed_source'] = 'community alcohol team'

In [None]:
groupby_plotsize(referral,'collapsed_source')

In [None]:
grouped = referral.groupby(['collapsed_source', pd.Grouper(freq='Y')]).count()
ax = grouped.unstack(level='collapsed_source')['Episode'].plot()
ax.set_xlim(pd.Timestamp('2007-01-01'), pd.Timestamp('2019-01-01'))
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax

In [None]:
groupby_percent(referral,'collapsed_source', 'Episode')

In [None]:
sources = referral.groupby('referral_source_label').count().reset_index()
sources['percent'] = 100*sources['Episode']/sources['Episode'].sum()
sources = sources.sort_values(['percent'])
print(sources.shape)
sources[['referral_source_label','percent']]
sources.plot(x='referral_source_label', y='percent', kind='barh', figsize=(10, 30), color='#2B8CC4')

### Referral route

In [None]:
groupby_plotsize(referral,'ReferralRoute')

In [None]:
groupby_percent(referral,'ReferralRoute', 'Episode')

### Referral route by source

In [None]:
x = pd.crosstab(referral.ReferralRoute, referral.collapsed_source, normalize='all')
sns.heatmap(x, annot=False)

In [None]:
grouped = referral.groupby(['ReferralRoute', pd.Grouper(freq='Y')]).count()

ax = grouped.unstack(level='ReferralRoute')['collapsed_source'].plot()
ax.set_xlim(pd.Timestamp('2007-01-01'), pd.Timestamp('2019-01-01'))
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax

Still haven't looked at:

 - 'ReferrerID',
 - 'Organisation',
 - 'ReferralMadeDate',
 - 'ReferredModalityDate',
 - 'AssessmentDate',
 - 'ReferredModalities',
 - 'AssessmentAgency',
 - 'TreatmentReferredOptionOther',