## Reccommendation Engine - Duncan
Last edit: July 5, 2021


In [3]:
import numpy as np
import pandas as pd
import operator
from flask import Flask, request, jsonify, render_template, url_for
import pickle

## SQL Connector 

This part does not work ATM, but ideally data will be pulled from the SQL DB and fed directly into the engine.

In [4]:
'''
import sqlalchemy
from sqlalchemy import create_engine
import mysql.connector



# Define the MySQL engine using MySQL Connector/Python
engine = sqlalchemy.create_engine(
    'mysql+mysqlconnector://myplaced_myplacedumarche:Mcfac_2020_mma@myplacedumarche.com:3306/myplaced_organizations'
).connect()

print(engine)
'''

"\nimport pandas as pd\nimport sqlalchemy\nfrom sqlalchemy import create_engine\nimport mysql.connector\n\n\n\n# Define the MySQL engine using MySQL Connector/Python\nengine = sqlalchemy.create_engine(\n    'mysql+mysqlconnector://myplaced_myplacedumarche:Mcfac_2020_mma@myplacedumarche.com:3306/myplaced_organizations'\n).connect()\n\nprint(engine)\n"

## Define Inputs

Here, we define our inputs that come from the DB. Note that we will use the following data:
- organizations_df: entire repository of organizations
- locations_df: list of all available locations
- roles_df: list of all available volunteer roles 

We will also define some custom lists and matrices that will be used for calculations at a later point.

In [21]:
#pretend that data is being loaded from DB 
organizations_df = pd.read_csv('Organizations_DB.csv')
locations_df = pd.read_csv('Locations_DB.csv')
roles_df = pd.read_csv('Available_Roles_DB.csv')

In [22]:
organizations_df.replace('\xa0', ' ', regex=True, inplace=True)
organizations_df.head(3)

Unnamed: 0,OrgID,Name,Address Line 1,Address Line 2,Zip Code,Location,Coordinates,Org size,Service size,Target demographic,Services Provided,Available Roles,Brief description,Link,General Contact,Internal Contact,Consent
0,1,Le Chic Resto Pop,,,,Mercier—Hochelaga-Maisonneuve,,small,small,,,"Cleaning, Cooking, Food preparation","The organization offers, through its socially ...",https://www.myplacedumarche.com/le-chic-resto-...,,,
1,2,Maison de Quartier Villeray,,,,Villeray—Saint-Michel—Parc-Extension,,small,small,,,"Gardening/Harvesting, Food preparation, Cooking","Since 1989, It offers activities stimulating m...",https://www.myplacedumarche.com/la-maison-de-q...,,,
2,3,Resto Plateau,,,,Le Plateau-Mont-Royal,,medium,medium,,,"Food preparation, Cooking",Non-profit organization with the goal of incre...,https://www.myplacedumarche.com/resto-plateau/,,,


In [23]:
roles_df.head(3)

Unnamed: 0,Available Roles
0,Administration
1,Activities supervisor
2,Cleaning


In [24]:
locations_df.head(3)

Unnamed: 0,Organization Location
0,L’Île-Bizard—Sainte-Geneviève
1,Pierrefonds-Roxboro
2,Saint-Laurent


In [25]:
locations_df = locations_df['Organization Location'].tolist()
all_locations = [i.replace('\xa0',' ') for i in locations_df]

all_roles = list(roles_df['Available Roles'])

print(all_locations)
print('')
print(all_roles)

['L’Île-Bizard—Sainte-Geneviève', 'Pierrefonds-Roxboro', 'Saint-Laurent', 'Ahuntsic-Cartierville', 'Montréal-Nord', 'Rivière-des-Prairies—Pointe-aux-Trembles', 'Anjou', 'Saint-Léonard', 'Villeray—Saint-Michel—Parc-Extension', 'Rosemont—La Petite-Patrie', 'Mercier—Hochelaga-Maisonneuve', 'Le Plateau-Mont-Royal', 'Outremont', 'Ville-Marie', 'Côte-des-Neiges—Notre-Dame-de-Grâce', 'Le Sud-Ouest', 'Verdun', 'LaSalle', 'Boucherville']

['Administration', 'Activities supervisor', 'Cleaning', 'Cooking', 'Delivery', 'Education', 'Food preparation', 'Food serving', 'Gardening/Harvesting', 'Kitchen & Cafeteria', 'Packing & Sorting', 'Warehouse', 'Daycare', 'Professional/Skills development', 'Other', 'Home Visits', 'Cashier']


In [26]:
# dictionary mapping input to index in volunteer sizes preference list
all_org_sizes = ['smallOrg', 'mediumOrg' ,'largeOrg']
all_service_sizes = ['smallService', 'mediumService' ,'largeService']

# matrix that maps two locations to their physical distance
distMatrix = [[0, 1, 2, 3, 4, 5, 5, 4, 3, 4, 5, 4, 4, 4, 3, 4, 4, 3, 2],
              [0, 0, 1, 1, 2, 4, 3, 3, 2, 2, 4, 3, 3, 3, 2, 3, 3, 3, 2],
              [0, 0, 0, 1, 2, 3, 3, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1],
              [0, 0, 0, 0, 1, 2, 2, 1, 1, 2, 3, 2, 2, 3, 2, 3, 3, 3, 2],
              [0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4],
              [0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5],
              [0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 1, 2, 3, 3, 4, 4, 4, 5, 5],
              [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 2, 1, 3, 3, 4, 3],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 2, 2, 3, 4, 4],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

## Process Inputs

Here, we take inputs from the web-page and store them as variables. For now, we define the inputs as **inp = ['Verdun', 'Saint-Laurent', 'Ville-Marie','smallOrg', 'mediumOrg', 'smallService', 'mediumService', 'Cooking' ,'Gardening/Harvesting' ,'Cleaning']**
for demonstration purposes.

Note that currently, the user can specify 4 types of preferences, as follows:
- Location of the organization
- Volunteer role 
- Organization size
- Service size

For each preference, there are 3 possible types of inputs. If no input is received from the web-page (1) (the user does not select any options), the corresponding input variable will be defined as an empty array. The user can also select no preference (2)

Otherwise, the user can select 1+ options for each preference type (3) -- for instance, the user can specify that they are open to volunteering in both Verdun and Saint-Laurent. 

In [27]:
#blank if no input is received 
location_pref = []
role_pref = []
org_size_pref = []
service_size_pref = []


#placeholder for now, later this will come from the user inputs
inp = ['Verdun', 'Saint-Laurent', 'Ville-Marie','smallOrg', 'mediumOrg', 'smallService', 'mediumService', 'Cooking' ,'Gardening/Harvesting' ,'Cleaning']


# verify and process inputs - this step is not strictly necessary 
input = [str(x) for x in inp]
for i in input:
    if i in all_locations or i == 'NoLocationPref': #location
        location_pref.append(i)
    elif i in all_roles or i == 'NoRolePref': #role
        role_pref.append(i)
    elif i in all_org_sizes or i == 'NoOrgPref': #org size
        org_size_pref.append(i)
    elif i in all_service_sizes or i == 'NoServicePref': #service size
        service_size_pref.append(i)

        
print(location_pref)
print(org_size_pref)
print(service_size_pref)
print(role_pref)

['Verdun', 'Saint-Laurent', 'Ville-Marie']
['smallOrg', 'mediumOrg']
['smallService', 'mediumService']
['Cooking', 'Gardening/Harvesting', 'Cleaning']


## Distance Penalty Functions

Next, we define 4 types of penalty functions that will output a score for each organization based on the closeness of the user's preferences to the characteristics of a particular organization. The 4 types of penalities will then be aggregated for each organization.

The penalities are
- Location/Physical distance penalty
- Role penalty
- Organizaton size penalty 
- Service type penalty

Each penalty ranges from 0-5, and so the minimum penalty for each organization is 0 while the max penalty is 5*4 = 20. 

#### Location/Physical Distance Penalty 

Penalty ranges from 0-5, depending on the closest distance from the user's preference(s) to an organization. For instance, if the user specifies 3 boroughs, only the distance from the user's chosen borough which is closest to the organization's borough will be calculated.

In [29]:
def getPhysicalDistance(location_pref, org_location):
    #if there is no location input or if the user selected no preference, then penalty is 0
    if location_pref == [] or location_pref == 'NoLocationPref':
        return 0
    #otherwise, loop through the user's selected locations, calculate the distance penalty for each location, and take the smallest one
    else:
        penalty = []
        for i in range(len(location_pref)): 
            x = all_locations.index(location_pref[i])
            y = all_locations.index(org_location)
            if x < y:
                penalty.append(distMatrix[x][y])
            else:
                penalty.append(distMatrix[y][x]) 
    return min(penalty)

Example: here, we are calculating the distance from each of the user's 3 location preferences to Villeray. Note that since the user selected multiple options, only the closest distance is taken. 

In [30]:
location_pref

['Verdun', 'Saint-Laurent', 'Ville-Marie']

In [31]:
organizations_df['Location'][1]

'Villeray—Saint-Michel—Parc-Extension'

In [None]:
getPhysicalDistance(location_pref, organizations_df['Location'][1])

#### Role Preference Penalty

Penalty ranges from 0-5, depending on the proportion of the user's selected preference(s) that match the roles that the organization offers.

In [34]:
#RULE: and 0-5 depending on proportion of user's preference(s) that the organization offers

def getRoleDistance(role_pref, org_roles):

    # if there is no role input or if the user selected no preference, then penalty is 0
    if role_pref == [] or role_pref == 'NoRolePref':
        return 0.0
    
    #for each of the user's specified roles, if the role is found in the organization, increase a counter
    #return the proportion of matched roles in relation to the number of roles specified as a score out of 5
    #ex: 2/3 matched roles = 1.66 
    available_roles = 0
    for i in role_pref:
        if i in org_roles:
            available_roles += 1

    return ((1-available_roles/len(role_pref))*5)

Example: here, the user has selected 3 preferences and 2/3 of the preferences are satisfied (Gardening/Harvesting, Cooking). Thus, on a scale from 0-5, the penalty is 1.6666. If all the user's preferences had been satisfied, the penalty would have been 0, and if none had been satisfied, the penalty would have been 5.

In [35]:
role_pref

['Cooking', 'Gardening/Harvesting', 'Cleaning']

In [36]:
organizations_df['Available Roles'][1]

'Gardening/Harvesting, Food preparation, Cooking'

In [37]:
getRoleDistance(role_pref, organizations_df['Available Roles'][1])

1.666666666666667

#### Organization Size Penalty 

Penalty ranges from 0-5, based on whether or not ANY of the user's size preferences match the organization's size. Thus, the penalty is 0 if any of the user's selected preferences match the organization's size. If none of the user's preferred organization sizes match the organization's size, the score is either 2.5 or 5, depending on how far off the organization's size is from the user's preferred size. For instance, if the user selects "small" but the organization is "large", then the penalty is 5, but if the user selects "medium" and the organization is "large", then the penalty is 2.5, based on the assumption that "medium" is closer to "large" than "small" is. 

In [38]:
#RULES: if all or some of the user's preferred organization sizes match the organization's size, return 0
#if none of the user's preferred organization's sizes match, return 2.5 if size is 1 increment away (i.e. small v medium), and 5 if size is 2 increments away (i.e. small vs. large)

def getOrgSizePrefDistance(org_size_pref, org_size):
   
    # if there is no size input or if the user selected no preference, then penalty is 0
    if org_size_pref == [] or org_size_pref == 'NoOrgPref':
        return 0.0

    orgSizeMap = {'small': 'smallOrg', 'medium': 'mediumOrg', 'large': 'largeOrg'}

    #if the organization's size is contained in one of the user's preference(s), return 0
    if orgSizeMap[org_size] in org_size_pref:
        return 0.0 
    
    org_pref_penalty = 0 
    
    #increase the penalty by 2.5 if either the user's choice contains 'medium' or if the organization's size is medium -- since no matter what then the user's preference will only be 1 step away from organization's
    #otherwise increase penalty by 5 (max)
    if 'mediumOrg' in org_size_pref or org_size == 'medium':
        org_pref_penalty += 2.5 
    else:
        org_pref_penalty += 5
        
    return org_pref_penalty

Example: here, the user has selected either a small or medium organization. The organization is small, so the penalty is 0.

In [39]:
org_size_pref

['smallOrg', 'mediumOrg']

In [40]:
organizations_df['Org size'][1]

'small'

In [41]:
getOrgSizePrefDistance(org_size_pref, organizations_df['Org size'][1])

0.0

#### Service Size Penalty 

Penalty ranges from 0-5, based on whether or not ANY of the user's service size preferences match the organization's service size. The logic for this function is identical to the organization size function. 

In [42]:
def getServiceSizePrefDistance(service_size_pref, service_size):
   
    # if there is no size input or if the user selected no preference, then penalty is 0
    if service_size_pref == [] or service_size_pref == 'NoServicePref':
        return 0.0

    serviceSizeMap = {'small': 'smallService', 'medium': 'mediumService', 'large': 'largeService'}

    #if the service's size is contained in one of the user's preference(s), return 0
    if serviceSizeMap[service_size] in service_size_pref:
        return 0.0 
    
    service_pref_penalty = 0 
    
    #increase the penalty by 2.5 if either the user's choice contains 'medium' or if the organization's size is medium -- since no matter what then the user's preference will only be 1 step away from organization's
    #otherwise increase penalty by 5 (max)
    if 'mediumService' in service_size_pref or service_size == 'medium':
        service_pref_penalty += 2.5 
    else:
        service_pref_penalty += 5
        
    return service_pref_penalty


Example: here, the user selects that they prefer either a small or medium service. The organization's service size is large, and since medium is close to large, the penalty is 2.5. If the user had only selected small service, then the penalty would have been 5.

In [48]:
service_size_pref

['smallService', 'mediumService']

In [49]:
organizations_df['Service size'][3]

'large'

In [51]:
getServiceSizePrefDistance(service_size_pref, organizations_df['Service size'][3])

2.5

#### Aggregate Distances Function

Here, the 4 distance penalities are added, and an aggregate score is returned.

In [55]:
def distance(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df):
    distance = 0.0
    distance += getPhysicalDistance(location_pref, organizations_df['Location'])
    distance += getRoleDistance(role_pref, organizations_df['Available Roles'])
    distance += getOrgSizePrefDistance(org_size_pref, organizations_df['Org size'])
    distance += getServiceSizePrefDistance(service_size_pref, organizations_df['Service size'])

    return distance

In [56]:
distance(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df.iloc[1])

2.666666666666667

## Final Ranking Function

Here, we loop through the organizations and run the distance function for each one, given the user's input preferences. Thus, each organization will be assigned an aggregate penalty score. Then, the organizations will be ranked, and the top 5 organizations with the lowest score will be output.

In [59]:
def finalRanking(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df):
    
    weights = {}
    for i in range(0, len(organizations_df)):
        weights[i] = distance(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df.iloc[i])
    sorted_orgs = sorted(weights.items(), key=operator.itemgetter(1))

    #find and display the top 5
    output = []
    for i in range(5):
        output.append(organizations_df.iloc[sorted_orgs[i][0], 1])
        
    return output

In [60]:
finalRanking(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df)

['Le PAS de la Rue',
 'APRMM',
 'Midnight Kitchen',
 'Le Chic Resto Pop',
 'Maison de Quartier Villeray']

## Final Code 

In [62]:
#pretend that data is being loaded from DB 
organizations_df = pd.read_csv('Organizations_DB.csv')
locations_df = pd.read_csv('Locations_DB.csv')
roles_df = pd.read_csv('Available_Roles_DB.csv')

organizations_df.replace('\xa0', ' ', regex=True, inplace=True)
locations_df = locations_df['Organization Location'].tolist()
all_locations = [i.replace('\xa0',' ') for i in locations_df]
all_roles = list(roles_df['Available Roles'])

# dictionary mapping input to index in volunteer sizes preference list
all_org_sizes = ['smallOrg', 'mediumOrg' ,'largeOrg']
all_service_sizes = ['smallService', 'mediumService' ,'largeService']

# matrix that maps two locations to their physical distance
distMatrix = [[0, 1, 2, 3, 4, 5, 5, 4, 3, 4, 5, 4, 4, 4, 3, 4, 4, 3, 2],
              [0, 0, 1, 1, 2, 4, 3, 3, 2, 2, 4, 3, 3, 3, 2, 3, 3, 3, 2],
              [0, 0, 0, 1, 2, 3, 3, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1],
              [0, 0, 0, 0, 1, 2, 2, 1, 1, 2, 3, 2, 2, 3, 2, 3, 3, 3, 2],
              [0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4],
              [0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5],
              [0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 1, 2, 3, 3, 4, 4, 4, 5, 5],
              [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 2, 1, 3, 3, 4, 3],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 2, 2, 3, 4, 4],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

In [66]:
def getPhysicalDistance(location_pref, org_location):
    #if there is no location input or if the user selected no preference, then penalty is 0
    if location_pref == [] or location_pref == 'NoLocationPref':
        return 0
    #otherwise, loop through the user's selected locations, calculate the distance penalty for each location, and take the smallest one
    else:
        penalty = []
        for i in range(len(location_pref)): 
            x = all_locations.index(location_pref[i])
            y = all_locations.index(org_location)
            if x < y:
                penalty.append(distMatrix[x][y])
            else:
                penalty.append(distMatrix[y][x]) 
    return min(penalty)

def getRoleDistance(role_pref, org_roles):

    # if there is no role input or if the user selected no preference, then penalty is 0
    if role_pref == [] or role_pref == 'NoRolePref':
        return 0.0
    
    #for each of the user's specified roles, if the role is found in the organization, increase a counter
    #return the proportion of matched roles in relation to the number of roles specified as a score out of 5
    #ex: 2/3 matched roles = 1.66 
    available_roles = 0
    for i in role_pref:
        if i in org_roles:
            available_roles += 1

    return ((1-available_roles/len(role_pref))*5)


def getOrgSizePrefDistance(org_size_pref, org_size):
   
    # if there is no size input or if the user selected no preference, then penalty is 0
    if org_size_pref == [] or org_size_pref == 'NoOrgPref':
        return 0.0

    orgSizeMap = {'small': 'smallOrg', 'medium': 'mediumOrg', 'large': 'largeOrg'}

    #if the organization's size is contained in one of the user's preference(s), return 0
    if orgSizeMap[org_size] in org_size_pref:
        return 0.0 
    
    org_pref_penalty = 0 
    
    #increase the penalty by 2.5 if either the user's choice contains 'medium' or if the organization's size is medium -- since no matter what then the user's preference will only be 1 step away from organization's
    #otherwise increase penalty by 5 (max)
    if 'mediumOrg' in org_size_pref or org_size == 'medium':
        org_pref_penalty += 2.5 
    else:
        org_pref_penalty += 5
        
    return org_pref_penalty

def getServiceSizePrefDistance(service_size_pref, service_size):
   
    # if there is no size input or if the user selected no preference, then penalty is 0
    if service_size_pref == [] or service_size_pref == 'NoServicePref':
        return 0.0

    serviceSizeMap = {'small': 'smallService', 'medium': 'mediumService', 'large': 'largeService'}

    #if the service's size is contained in one of the user's preference(s), return 0
    if serviceSizeMap[service_size] in service_size_pref:
        return 0.0 
    
    service_pref_penalty = 0 
    
    #increase the penalty by 2.5 if either the user's choice contains 'medium' or if the organization's size is medium -- since no matter what then the user's preference will only be 1 step away from organization's
    #otherwise increase penalty by 5 (max)
    if 'mediumService' in service_size_pref or service_size == 'medium':
        service_pref_penalty += 2.5 
    else:
        service_pref_penalty += 5
        
    return service_pref_penalty

def distance(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df):
    distance = 0.0
    distance += getPhysicalDistance(location_pref, organizations_df['Location'])
    distance += getRoleDistance(role_pref, organizations_df['Available Roles'])
    distance += getOrgSizePrefDistance(org_size_pref, organizations_df['Org size'])
    distance += getServiceSizePrefDistance(service_size_pref, organizations_df['Service size'])

    return distance

def finalRanking(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df):
    
    weights = {}
    for i in range(0, len(organizations_df)):
        weights[i] = distance(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df.iloc[i])
    sorted_orgs = sorted(weights.items(), key=operator.itemgetter(1))

    #find and display the top 5
    output = []
    for i in range(5):
        output.append(organizations_df.iloc[sorted_orgs[i][0], 1])
        
    return output

def readDescription(orgName):
    '''returns description (string) of given organization'''

    descriptionAddress = 'static/media/description/' + orgName + '.txt'
    f = open(descriptionAddress, 'r')
    description = f.read()
    f.close()

    return description


def predict(): 
    
    #blank if no input is received 
    location_pref = []
    role_pref = []
    org_size_pref = []
    service_size_pref = []


    #placeholder for now, later this will come from the user inputs
    inp = ['Verdun', 'Saint-Laurent', 'Ville-Marie','smallOrg', 'mediumOrg', 'smallService', 'mediumService', 'Cooking' ,'Gardening/Harvesting' ,'Cleaning']

    # verify and process inputs - this step is not strictly necessary 
    input = [str(x) for x in inp]
    for i in input:
        if i in all_locations or i == 'NoLocationPref': #location
            location_pref.append(i)
        elif i in all_roles or i == 'NoRolePref': #role
            role_pref.append(i)
        elif i in all_org_sizes or i == 'NoOrgPref': #org size
            org_size_pref.append(i)
        elif i in all_service_sizes or i == 'NoServicePref': #service size
            service_size_pref.append(i)


    names = finalRanking(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df)

    #extracting the description and link of rank orgs from organizations_V3.xlsx file
    descriptions = []
    links = []
    
    for i in names:
        org_descr = organizations_df.loc[organizations_df['Name'] == i, 'Brief description' ].item()
        org_link = organizations_df.loc[organizations_df['Name'] == i, 'Link'].item()
        descriptions.append(org_descr)
        links.append(org_link)

    # replacing all spaces in names by underscore to open org file
    names_address = names.copy()
    for i in range(len(names_address)):
        temp = names_address[i]
        temp = temp.lower()
        names_address[i] = temp.replace(' ', '_')
    
    return names, descriptions, links, names_address

In [68]:
predict()

(['Le PAS de la Rue',
  'APRMM',
  'Midnight Kitchen',
  'Le Chic Resto Pop',
  'Maison de Quartier Villeray'],
 ['Le PAS de la rue is a non-profit that, for more than twenty years, helps, accompanies and supports people 55 and older who are homeless, in severe economic precariousness or in social isolation.',
  'Since 1986, the Association des popotes roulantes du Montréal Métropolitain (APRMM) has been coordinating a network of entirely volunteer run popotes. ',
  'Midnight Kitchen is a non-profit, worker and volunteer-run collective that operates out of tio’tia:ke (unceded kanien’kehá:ka territory) dedicated to providing accessible food to as many people as possible.',
  'The organization offers, through its socially responsible restaurant, nutritious, quality meals at low prices to meet the needs of people experiencing food insecurity. It is also a welcoming living environment, open to all, which promotes social diversity',
  'Since 1989, It offers activities stimulating mutual aid

## Final Code with Flask

In [None]:
import numpy as np
import pandas as pd
import operator
from flask import Flask, request, jsonify, render_template, url_for
import pickle

app = Flask(__name__)


#pretend that data is being loaded from DB 
organizations_df = pd.read_csv('Organizations_DB.csv')
locations_df = pd.read_csv('Locations_DB.csv')
roles_df = pd.read_csv('Available_Roles_DB.csv')

organizations_df.replace('\xa0', ' ', regex=True, inplace=True)
locations_df = locations_df['Organization Location'].tolist()
all_locations = [i.replace('\xa0',' ') for i in locations_df]
all_roles = list(roles_df['Available Roles'])

# dictionary mapping input to index in volunteer sizes preference list
all_org_sizes = ['smallOrg', 'mediumOrg' ,'largeOrg']
all_service_sizes = ['smallService', 'mediumService' ,'largeService']

# matrix that maps two locations to their physical distance
distMatrix = [[0, 1, 2, 3, 4, 5, 5, 4, 3, 4, 5, 4, 4, 4, 3, 4, 4, 3, 2],
              [0, 0, 1, 1, 2, 4, 3, 3, 2, 2, 4, 3, 3, 3, 2, 3, 3, 3, 2],
              [0, 0, 0, 1, 2, 3, 3, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1],
              [0, 0, 0, 0, 1, 2, 2, 1, 1, 2, 3, 2, 2, 3, 2, 3, 3, 3, 2],
              [0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4],
              [0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5],
              [0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 1, 2, 3, 3, 4, 4, 4, 5, 5],
              [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 2, 1, 3, 3, 4, 3],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 4],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 2, 2, 3, 4, 4],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

def getPhysicalDistance(location_pref, org_location):
    #if there is no location input or if the user selected no preference, then penalty is 0
    if location_pref == [] or location_pref == 'NoLocationPref':
        return 0
    #otherwise, loop through the user's selected locations, calculate the distance penalty for each location, and take the smallest one
    else:
        penalty = []
        for i in range(len(location_pref)): 
            x = all_locations.index(location_pref[i])
            y = all_locations.index(org_location)
            if x < y:
                penalty.append(distMatrix[x][y])
            else:
                penalty.append(distMatrix[y][x]) 
    return min(penalty)

def getRoleDistance(role_pref, org_roles):

    # if there is no role input or if the user selected no preference, then penalty is 0
    if role_pref == [] or role_pref == 'NoRolePref':
        return 0.0
    
    #for each of the user's specified roles, if the role is found in the organization, increase a counter
    #return the proportion of matched roles in relation to the number of roles specified as a score out of 5
    #ex: 2/3 matched roles = 1.66 
    available_roles = 0
    for i in role_pref:
        if i in org_roles:
            available_roles += 1

    return ((1-available_roles/len(role_pref))*5)


def getOrgSizePrefDistance(org_size_pref, org_size):
   
    # if there is no size input or if the user selected no preference, then penalty is 0
    if org_size_pref == [] or org_size_pref == 'NoOrgPref':
        return 0.0

    orgSizeMap = {'small': 'smallOrg', 'medium': 'mediumOrg', 'large': 'largeOrg'}

    #if the organization's size is contained in one of the user's preference(s), return 0
    if orgSizeMap[org_size] in org_size_pref:
        return 0.0 
    
    org_pref_penalty = 0 
    
    #increase the penalty by 2.5 if either the user's choice contains 'medium' or if the organization's size is medium -- since no matter what then the user's preference will only be 1 step away from organization's
    #otherwise increase penalty by 5 (max)
    if 'mediumOrg' in org_size_pref or org_size == 'medium':
        org_pref_penalty += 2.5 
    else:
        org_pref_penalty += 5
        
    return org_pref_penalty

def getServiceSizePrefDistance(service_size_pref, service_size):
   
    # if there is no size input or if the user selected no preference, then penalty is 0
    if service_size_pref == [] or service_size_pref == 'NoServicePref':
        return 0.0

    serviceSizeMap = {'small': 'smallService', 'medium': 'mediumService', 'large': 'largeService'}

    #if the service's size is contained in one of the user's preference(s), return 0
    if serviceSizeMap[service_size] in service_size_pref:
        return 0.0 
    
    service_pref_penalty = 0 
    
    #increase the penalty by 2.5 if either the user's choice contains 'medium' or if the organization's size is medium -- since no matter what then the user's preference will only be 1 step away from organization's
    #otherwise increase penalty by 5 (max)
    if 'mediumService' in service_size_pref or service_size == 'medium':
        service_pref_penalty += 2.5 
    else:
        service_pref_penalty += 5
        
    return service_pref_penalty

def distance(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df):
    distance = 0.0
    distance += getPhysicalDistance(location_pref, organizations_df['Location'])
    distance += getRoleDistance(role_pref, organizations_df['Available Roles'])
    distance += getOrgSizePrefDistance(org_size_pref, organizations_df['Org size'])
    distance += getServiceSizePrefDistance(service_size_pref, organizations_df['Service size'])

    return distance

def finalRanking(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df):
    
    weights = {}
    for i in range(0, len(organizations_df)):
        weights[i] = distance(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df.iloc[i])
    sorted_orgs = sorted(weights.items(), key=operator.itemgetter(1))

    #find and display the top 5
    output = []
    for i in range(5):
        output.append(organizations_df.iloc[sorted_orgs[i][0], 1])
        
    return output

def readDescription(orgName):
    '''returns description (string) of given organization'''

    descriptionAddress = 'static/media/description/' + orgName + '.txt'
    f = open(descriptionAddress, 'r')
    description = f.read()
    f.close()

    return description

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])

def predict(): 
    
    #blank if no input is received 
    location_pref = []
    role_pref = []
    org_size_pref = []
    service_size_pref = []


    #placeholder for now, later this will come from the user inputs
    inp = ['Verdun', 'Saint-Laurent', 'Ville-Marie','smallOrg', 'mediumOrg', 'smallService', 'mediumService', 'Cooking' ,'Gardening/Harvesting' ,'Cleaning']

    # verify and process inputs - this step is not strictly necessary 
    input = [str(x) for x in inp]
    for i in input:
        if i in all_locations or i == 'NoLocationPref': #location
            location_pref.append(i)
        elif i in all_roles or i == 'NoRolePref': #role
            role_pref.append(i)
        elif i in all_org_sizes or i == 'NoOrgPref': #org size
            org_size_pref.append(i)
        elif i in all_service_sizes or i == 'NoServicePref': #service size
            service_size_pref.append(i)


    names = finalRanking(location_pref, role_pref, org_size_pref, service_size_pref, organizations_df)

    #extracting the description and link of rank orgs from organizations_V3.xlsx file
    descriptions = []
    links = []
    
    for i in names:
        org_descr = organizations_df.loc[organizations_df['Name'] == i, 'Brief description' ].item()
        org_link = organizations_df.loc[organizations_df['Name'] == i, 'Link'].item()
        descriptions.append(org_descr)
        links.append(org_link)

    # replacing all spaces in names by underscore to open org file
    names_address = names.copy()
    for i in range(len(names_address)):
        temp = names_address[i]
        temp = temp.lower()
        names_address[i] = temp.replace(' ', '_')
    
        return render_template('topOrgsPreset.html', orgNames = names, orgAdress = names_adress, descriptions = descriptions, links = links)

if __name__ == "__main__":
    app.run(debug=True)