<a href="https://colab.research.google.com/github/emrapport/w266-final-project/blob/master/model_results.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Analysis of model results
Edit `metric_dict` to change weights for various metrics.



In [0]:
# Input metric weights from 0 to 1.
metric_dict = {'delta_gender':.9,
               'delta_party':.6,
               'final_gender':.7,
               'final_party':.7,
               'best_gender':.8,
               'best_party':.7,
               'percent_W':.9,
               'percent_D':.8
               }

In [1]:
import pandas as pd
import numpy as np
import copy
import time
import pickle
!pip install gcsfs

pd.set_option('max_colwidth', 100)

Collecting gcsfs
  Downloading https://files.pythonhosted.org/packages/65/e2/05f903ce8f77804127195cfcc1ca8b500a3157a1572dcc4b82bf5af01564/gcsfs-0.4.0-py2.py3-none-any.whl
Installing collected packages: gcsfs
Successfully installed gcsfs-0.4.0


In [2]:
# this cell is only necessary if running in colab
project_id = 'w266-251323'
import uuid
bucket_name = 'fb-congressional-data/'
from google.colab import auth
auth.authenticate_user()
!gcloud config set project {project_id}

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Updated property [core/project].


In [0]:
from google.colab import files
import io 

Don't need to do the next few cells unless uploading new results from local machine.

In [0]:
# Get manually inputted model results. [11/23/19 3pm]

uploaded = files.upload()
model_results = pd.read_csv(io.BytesIO(uploaded['model_results.csv']))

Saving model_results.csv to model_results (6).csv


In [0]:
# Download the new dataframe to local machine.

model_results.to_csv('model_results.csv', header=True, index=False)
files.download('model_results.csv')

In [0]:
  !gsutil cp model_results\ \(6\).csv gs://fb-congressional-data/models/model_results.csv

Copying file://model_results (6).csv [Content-Type=text/csv]...
/ [1 files][ 21.9 KiB/ 21.9 KiB]                                                
Operation completed over 1 objects/21.9 KiB.                                     


Start here to get results from the bucket.

In [0]:
model_results = pd.read_csv("gs://fb-congressional-data/models/model_results.csv")

In [5]:
model_results.head()

Unnamed: 0,beg_end_resp,drop_short,train_M,train_W,train_R,train_D,dev_M,dev_W,dev_R,dev_D,max_resps,epochs,batch_size,max_seq,freq_wds,n_layers,layer_size,filter_size,dropout,dense_size,embedding_dim,model_idx,init_loss,init_acc,init_val_acc,final_loss,final_acc,final_val_acc,best_val_acc,best_epoch,delta_val_acc,delta_best_val_acc,percent_W,percent_D,saved_model_name
0,Y,2,2900753,1061531,0.0,0.0,1948371,344536,0,0,50,10,1000,10,5000,2,32,2,0.2,10,50,1,0.5206,0.7662,0.849,0.4936,0.7781,0.8489,0.849,1,-0.0001,0.0,1.559,,test3_model_0_gender_1574450353.2073507.h5
1,Y,2,2900753,1061531,0.0,0.0,1948371,344536,0,0,50,10,1000,20,5000,2,32,2,0.2,10,50,2,0.518,0.7663,0.8501,0.4891,0.7789,0.8472,0.8486,5,-0.0029,-0.0015,1.861,,test3_model_2_gender_1574450353.2073507.h5
2,Y,2,2900753,1061531,0.0,0.0,1948371,344536,0,0,50,10,1000,30,5000,2,32,2,0.2,10,50,3,0.5106,0.7699,0.8511,0.4844,0.7813,0.8496,0.8517,3,-0.0015,0.0006,1.994,,LPFriday11_22_model_1_gender_1574459044.4948301.h5
3,Y,2,2900753,1061531,0.0,0.0,1948371,344536,0,0,50,10,1000,50,5000,2,32,2,0.2,10,50,4,0.5143,0.7677,0.8452,0.483,0.782,0.8466,0.8477,4,0.0014,0.0025,2.384,,LPFriday11_22_model_3_gender_1574459044.4948301.h5
4,Y,2,2900753,1061531,0.0,0.0,1948371,344536,0,0,50,10,1000,100,5000,2,32,2,0.2,10,50,5,0.5068,0.7718,0.8467,0.4813,0.7829,0.8473,0.849,3,0.0006,0.0023,2.204,,LPFriday11_22_model_5_gender_1574459044.4948301.h5


In [0]:
def get_model_metric(results_df, metric_dict,n):
  '''Take a dataframe of model results, metrics and number of top models
  and return that dataframe with a new model_metric column. The dataframe must have "saved_model_name",
  "delta_val_acc","final_val_acc","best_val_acc", "percent_W", and "percent_D"'''

  gender_results = results_df[results_df['saved_model_name'].str.contains("gender")]
  party_results = results_df[results_df['saved_model_name'].str.contains("party")]

  gender_best_delta = gender_results.sort_values('delta_val_acc', ascending=False).head(n)
  gender_best_delta['model_metric'] = metric_dict['delta_gender']
  gender_best_final = gender_results.sort_values("final_val_acc", ascending=False).head(n)
  gender_best_final['model_metric'] = metric_dict['final_gender']
  gender_best_acc = gender_results.sort_values("best_val_acc", ascending=False).head(n)
  gender_best_acc['model_metric'] = metric_dict['best_gender']
  gender_best_W = gender_results.sort_values("percent_W", ascending=False).head(n)
  gender_best_W['model_metric'] = metric_dict['percent_W']

  party_best_delta = party_results.sort_values('delta_val_acc', ascending=False).head(n)
  party_best_delta['model_metric'] = metric_dict['delta_party']
  party_best_final = party_results.sort_values("final_val_acc", ascending=False).head(n)
  party_best_final['model_metric'] = metric_dict['final_party']
  party_best_acc = party_results.sort_values("best_val_acc", ascending=False).head(n)
  party_best_acc['model_metric'] = metric_dict['best_party']
  party_best_D = party_results.sort_values("percent_D", ascending=False).head(n)
  party_best_D['model_metric'] = metric_dict['percent_D']

  metrics_df = gender_best_delta.append([gender_best_final,gender_best_acc,gender_best_W,party_best_delta,
                                              party_best_delta,party_best_final,party_best_acc,party_best_D])
  best_indices = metrics_df.groupby('model_idx')['model_metric'].sum().reset_index()
  best_results_df = results_df.merge(best_indices,on='model_idx')
  return best_results_df

In [38]:
best_models = get_model_metric(model_results,metric_dict,10).sort_values('model_metric', ascending=False)
best_models.head(20)

Unnamed: 0,beg_end_resp,drop_short,train_M,train_W,train_R,train_D,dev_M,dev_W,dev_R,dev_D,max_resps,epochs,batch_size,max_seq,freq_wds,n_layers,layer_size,filter_size,dropout,dense_size,embedding_dim,model_idx,init_loss,init_acc,init_val_acc,final_loss,final_acc,final_val_acc,best_val_acc,best_epoch,delta_val_acc,delta_best_val_acc,percent_W,percent_D,saved_model_name,model_metric
19,Y,2,0,0,2459013.0,1486355.0,0,0,1698252,594655,50,10,1000,50,5000,2,128,2,0.5,10,50,17,0.5901,0.675,0.7351,0.5482,0.7031,0.7504,0.7505,7,0.0153,0.0154,,7.925,LPSaturday11_23_model_2_party_1574532795.1390407.h5,3.8
18,Y,2,2900753,1061531,0.0,0.0,1948371,344536,0,0,50,10,1000,50,5000,2,128,2,0.5,10,50,17,0.5067,0.771,0.8483,0.4793,0.7833,0.851,0.8512,9,0.0027,0.0029,1.172,,LPSaturday11_23_model_2_gender_1574532795.1390407.h5,3.8
21,Y,2,0,0,2459013.0,1486355.0,0,0,1698252,594655,50,10,1000,50,5000,2,128,2,0.8,10,50,18,0.6449,0.6367,0.7388,0.5807,0.6817,0.741,0.7411,5,0.0022,0.0023,,31.855,truncated to 6 epochs; met output size limit; party model,3.2
20,Y,2,2900753,1061531,0.0,0.0,1948371,344536,0,0,50,10,1000,50,5000,2,128,2,0.8,10,50,18,0.5344,0.7583,0.8448,0.4968,0.7764,0.8516,0.8516,10,0.0068,0.0068,0.717,,LPSaturday11_23_model_3_gender_1574532795.1390407.h5,3.2
14,Y,2,2900753,1061531,0.0,0.0,1948371,344536,0,0,50,10,1000,50,5000,2,128,2,0.0,10,50,15,0.4843,0.7809,0.8502,0.4597,0.7899,0.8408,0.8502,1,-0.0094,0.0,3.994,,LPSaturday11_23_model_0_gender_1574532795.1390407.h5,2.9
15,Y,2,0,0,2459013.0,1486355.0,0,0,1698252,594655,50,10,1000,50,5000,2,128,2,0.0,10,50,15,0.5535,0.7008,0.7103,0.5125,0.7269,0.7416,0.7416,10,0.0313,0.0313,,15.107,LPSaturday11_23_model_0_party_1574532795.1390407.h5,2.9
74,Y,1,2751507,1004243,0.0,0.0,1821965,328827,0,0,50,10,1000,50,20,1,128,2,0.5,10,50,49,0.5189,0.7658,0.848,0.4828,0.7823,0.8488,0.8498,2,0.0008,0.0018,1.366,,emSaturdayRound1_model_4_gender_1574526502.7369754.h5,2.6
25,Y,2,0,0,1813217.0,1108843.0,0,0,1698252,594655,25,10,1000,50,5000,2,32,2,0.2,10,50,20,0.5846,0.678,0.7192,0.5363,0.7096,0.7531,0.7531,10,0.0339,0.0339,,12.976,LPFriday11_22_model_1_party_1574533565.182682.h5,2.6
24,Y,2,2168175,765916,,,1948371,344536,0,0,25,10,1000,50,5000,2,32,2,0.2,10,50,20,0.4952,0.7785,0.8487,0.4667,0.7902,0.8475,0.8494,9,-0.0012,0.0007,2.799,,LPFriday11_22_model_1_gender_1574533565.182682.h5,2.6
75,Y,1,0,0,2336689.0,1402732.0,0,0,1593280,557512,50,10,1000,50,20,1,128,2,0.5,10,50,49,0.5997,0.6701,0.7215,0.5539,0.7008,0.7479,0.7515,9,0.0264,0.03,,13.997,emSaturdayRound1_model_4_party_1574526502.7369754.h5,2.6
