In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GRU
from tensorflow.keras.optimizers import Adam
from keras.utils import np_utils

In [2]:
intakes = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/group_project/datasets/intakes_initial.csv')
outcomes = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/group_project/datasets/outcomes_initial.csv')

In [9]:
outcomes['outcome_type'].value_counts(normalize=True)

Adoption           0.449255
Transfer           0.292717
Return to Owner    0.170120
Euthanasia         0.067224
Died               0.009464
Rto-Adopt          0.005887
Disposal           0.004597
Missing            0.000541
Relocate           0.000193
Name: outcome_type, dtype: float64

In [3]:
intakes = intakes.drop(columns=['Unnamed: 0'])
outcomes = outcomes.drop(columns=['Unnamed: 0'])

In [4]:
intakes[intakes['animal_id'] == 'A754715']

Unnamed: 0,animal_id,name,datetime,found_location,intake_type,intake_condition,animal_type,sex_upon_intake,age_upon_intake,breed,color,is_named,year,month,day
22,A754715,Rheia,2019-07-29 17:19:00,Austin (TX),Owner Surrender,Normal,Dog,Spayed Female,2.0,Labrador Retriever Mix,Black/White,1,2019,7,Monday
3074,A754715,Rheia,2017-07-23 15:00:00,Autumn Bay in Austin (TX),Stray,Normal,Dog,Intact Female,0.417,Labrador Retriever Mix,Black/White,1,2017,7,Sunday
86875,A754715,Rheia,2018-07-09 16:26:00,Round Rock (TX),Owner Surrender,Normal,Dog,Spayed Female,1.0,Labrador Retriever Mix,Black/White,1,2018,7,Monday


In [5]:
outcomes[outcomes['animal_id'] == 'A754715']

Unnamed: 0,animal_id,name,datetime,date_of_birth,outcome_type,outcome_subtype,animal_type,sex_upon_outcome,age_upon_outcome,breed,color,is_named,year,month,day
90714,A754715,Rheia,2017-07-29 13:38:00,02/09/2017,Adoption,Unknown,Dog,Spayed Female,0.417,Labrador Retriever Mix,Black/White,1,2017,7,Saturday
101817,A754715,Rheia,2018-08-06 17:16:00,02/09/2017,Adoption,Unknown,Dog,Spayed Female,1.0,Labrador Retriever Mix,Black/White,1,2018,8,Monday
120108,A754715,Rheia,2019-08-15 13:42:00,02/09/2017,Adoption,Unknown,Dog,Spayed Female,2.0,Labrador Retriever Mix,Black/White,1,2019,8,Thursday


In [10]:
intakes['datetime'] = intakes['datetime'].apply(pd.to_datetime)
outcomes['datetime'] = outcomes['datetime'].apply(pd.to_datetime)

In [11]:
intakes['intake_num'] = intakes.groupby(['animal_id'])['datetime'].rank(method='dense', ascending=False)
intakes['tracking_id'] = intakes['animal_id'] + '_' + intakes['intake_num'].astype('int').astype('str')
outcomes['outcome_num'] = outcomes.groupby(['animal_id'])['datetime'].rank(method='dense', ascending=False)
outcomes['tracking_id'] = outcomes['animal_id'] + '_' + outcomes['outcome_num'].astype('int').astype('str')

In [12]:
outcomes.set_index('tracking_id', inplace=True)
intakes.set_index('tracking_id', inplace=True)

full_df = pd.merge(outcomes, intakes, how='inner', 
                  right_index=True, left_index=True, suffixes=['_out', '_in'])

In [13]:
full_df['outcome_type'].value_counts(normalize=True)

Adoption           0.447472
Transfer           0.292740
Return to Owner    0.171673
Euthanasia         0.067695
Died               0.009084
Rto-Adopt          0.005973
Disposal           0.004652
Missing            0.000524
Relocate           0.000188
Name: outcome_type, dtype: float64

In [None]:
full_df.columns

Index(['animal_id_out', 'name_out', 'datetime_out', 'date_of_birth',
       'outcome_type', 'outcome_subtype', 'animal_type_out',
       'sex_upon_outcome', 'age_upon_outcome', 'breed_out', 'color_out',
       'is_named_out', 'year_out', 'month_out', 'day_out', 'outcome_num',
       'animal_id_in', 'name_in', 'datetime_in', 'found_location',
       'intake_type', 'intake_condition', 'animal_type_in', 'sex_upon_intake',
       'age_upon_intake', 'breed_in', 'color_in', 'is_named_in', 'year_in',
       'month_in', 'day_in', 'intake_num'],
      dtype='object')

In [None]:
full_df = full_df.drop(columns=['name_out','name_in','animal_id_out','breed_out','color_out','is_named_out','year_out','month_out',
                      'day_out','outcome_num','animal_id_in','animal_type_in','intake_num'])

In [None]:
full_df.columns

Index(['datetime_out', 'date_of_birth', 'outcome_type', 'outcome_subtype',
       'animal_type_out', 'sex_upon_outcome', 'age_upon_outcome',
       'datetime_in', 'found_location', 'intake_type', 'intake_condition',
       'sex_upon_intake', 'age_upon_intake', 'breed_in', 'color_in',
       'is_named_in', 'year_in', 'month_in', 'day_in'],
      dtype='object')

In [None]:
full_df.columns = ['date_out', 'dob', 'outcome_type', 'outcome_subtype',
                   'animal_type', 'sex_out', 'age_out', 'date_in',
                   'found_loc', 'intake_type', 'intake_condition', 'sex_in',
                   'age_in', 'breed', 'color', 'named_in','year_in','month_in','day_in']

In [None]:
full_df.head()

Unnamed: 0_level_0,date_out,dob,outcome_type,outcome_subtype,animal_type,sex_out,age_out,date_in,found_loc,intake_type,intake_condition,sex_in,age_in,breed,color,named_in,year_in,month_in,day_in
tracking_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
A006100_1,2017-12-07 00:00:00,07/09/2007,Return to Owner,Unknown,Dog,Neutered Male,1.0,2017-12-07 14:07:00,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Neutered Male,1.0,Spinone Italiano Mix,Yellow/White,1,2017,12,Thursday
A006100_2,2014-12-20 16:35:00,07/09/2007,Return to Owner,Unknown,Dog,Neutered Male,7.0,2014-12-19 10:21:00,8700 Research Blvd in Austin (TX),Public Assist,Normal,Neutered Male,7.0,Spinone Italiano Mix,Yellow/White,1,2014,12,Friday
A006100_3,2014-03-08 17:10:00,07/09/2007,Return to Owner,Unknown,Dog,Neutered Male,6.0,2014-03-07 14:26:00,8700 Research in Austin (TX),Public Assist,Normal,Neutered Male,6.0,Spinone Italiano Mix,Yellow/White,1,2014,3,Friday
A047759_1,2014-04-07 15:12:00,04/02/2004,Transfer,Partner,Dog,Neutered Male,1.0,2014-04-02 15:55:00,Austin (TX),Owner Surrender,Normal,Neutered Male,1.0,Dachshund,Tricolor,1,2014,4,Wednesday
A134067_1,2013-11-16 11:54:00,10/16/1997,Return to Owner,Unknown,Dog,Neutered Male,1.0,2013-11-16 09:02:00,12034 Research Blvd in Austin (TX),Public Assist,Injured,Neutered Male,1.0,Shetland Sheepdog,Brown/White,1,2013,11,Saturday


In [None]:
model_frame = full_df.drop(columns=['dob','found_loc','date_out','found_loc','date_in','year_in'])

In [None]:
model_frame.columns

Index(['outcome_type', 'outcome_subtype', 'animal_type', 'sex_out', 'age_out',
       'intake_type', 'intake_condition', 'sex_in', 'age_in', 'breed', 'color',
       'named_in', 'month_in', 'day_in'],
      dtype='object')

In [None]:
model_frame = pd.get_dummies(model_frame,drop_first=True,columns=['animal_type','sex_out','intake_type','intake_condition',
                                                                  'sex_in','breed','color','month_in','day_in'])

In [None]:
model_frame.to_csv('/content/drive/MyDrive/Colab Notebooks/group_project/datasets/model_frame.csv',)