# Code Examples

#### Examples of using functions from `key_functions.py` 

For more information refer to [our data product README](https://github.ubc.ca/nikihm/Capstone-project/blob/master/Docs/data_product_README.md)

In [5]:
import key_functions as kf
import pickle
import pandas as pd
from sklearn.linear_model import LogisticRegression

###### Classifier Training

In [None]:
# Damage Classifier
# (Note: Annotations are located in folder `/data/annotations/`)

dmg_model, dmg_vectorizer, dmg_annotations = kf.train_classifier('../data/annotations/final_annotations.txt', clf = LogisticRegression(C = 1, penalty = 'l2', solver = 'newton-cg', max_iter = 1000, random_state=42), context_length = 6, fit_model=True)

# CN Classifier

cn_model, cn_vectorizer, cn_annotations = kf.train_CN_classifier('../data/annotations/final_annotations.txt', clf = LogisticRegression(C = 1, penalty = 'l1', class_weight = 'balanced', solver = 'liblinear', max_iter = 10000, random_state = 42), context_length = 6)

###### Saving & Loading Claasifier

Loading classifier and vectorizer from `.pkl` format 

In [None]:
# Save Code
# WARNING: This will overwrite the currently saved model. Uncomment lines as needed. Or change filenames

# with open('../models/damage_model.pkl', 'wb') as file:
#     pickle.dump(dmg_model, file)
    
# with open('../models/damage_vectorizer.pkl', 'wb') as file:
#     pickle.dump(dmg_vectorizer, file)
    
# with open('../models/damage_annotations.pkl', 'wb') as file:
#     pickle.dump(dmg_annotations, file)
    
# with open('../models/cn_model.pkl', 'wb') as file:
#     pickle.dump(cn_model, file)

# with open('../models/cn_vectorizer.pkl', 'wb') as file:
#     pickle.dump(cn_vectorizer, file)

# with open('../models/cn_annotations.pkl', 'wb') as file:
#     pickle.dump(cn_annotations, file)

In [None]:
# Load Code
with open('../models/damage_model.pkl', 'rb') as file:
    dmg_model = pickle.load(file)
    
with open('../models/damage_vectorizer.pkl', 'rb') as file:
    dmg_vectorizer = pickle.load(file)
    
with open('../models/damage_annotations.pkl', 'rb') as file:
    dmg_annotations = pickle.load(file)
    
with open('../models/cn_model.pkl', 'rb') as file:
    cn_model = pickle.load(file)
    
with open('../models/cn_vectorizer.pkl', 'rb') as file:
    cn_vectorizer = pickle.load(file)
    
with open('../models/cn_annotations.pkl', 'rb') as file:
    cn_annotations = pickle.load(file)

###### Using the classifier

In [None]:
path_to_data = '../data/Lexis Cases txt/'
file_prefix = 'P'
file_suffix = '.txt'
file_identifiers = range(1, 86) # Range from 1 to 85

clf_results = []
for file_number in file_identifiers:
    print('## Processing ' + path_to_data + file_prefix + str(file_number) + file_suffix + ' ##', end='\r')
    with open(path_to_data + file_prefix + str(file_number) + file_suffix) as file:
        document_data = file.read()
    
    # Pass the model to the rule_based_parse_BCJ function
    # This will use rule based for columns such as judge name, etc. but classifier for damages/CN
    
    # To use rule based for damage/CN do not pass the model/vectorizer.
    clf_results.extend(kf.parse_BCJ(document_data, damage_model = dmg_model, damage_vectorizer = dmg_vectorizer, 
                                    annotated_damages = dmg_annotations, cn_model = cn_model, cn_vectorizer = cn_vectorizer, 
                                    annotated_cn = cn_annotations, min_predict_proba = 0.5, dmg_context_length = 6, 
                                    cn_context_length = 2))
    

###### Evaluating the model

In [None]:
gold_df = pd.read_csv('../data/annotations/gold_annotations.csv')
gold_df.dropna(how = 'all', inplace=True) 

dev_df = kf.convert_cases_to_DF(clf_results)
kf.evaluate(dev_df, gold_df)

###### Saving the results to .CSV form

In [None]:
dev_df.to_csv('../data/my_results.csv', index=False)