## Imports

In [202]:
import numpy as np
import pandas as pd
import datetime
import math
import random

pd.options.display.max_columns = 50

In [203]:
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
from sklearn.preprocessing import MinMaxScaler

In [3]:
# Plots
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
sns.set(color_codes=True)

In [24]:
def group_by_col_size(df,col):
    '''
    '''
    return df.groupby(col).size()

## Functions

In [4]:
def set_labels(freq, ax,horz_shift=.2,vert_shift=1000,w="bold",fsize=10,c="b"):
    '''
    Function
    --------
    Set Frequency Labels on Bar Plot
    
    Parameters
    ----------
    freq : List of frequencies to plot for each bar
    ax : Axis to Plot Frequency Labels on
    horz_shift : Shift labels left/right
    vert_shift : Shift labels up/down
    w : "weight" -> Bold by defauly
    fsize : fontsize
    c : label color
    
    '''
    for i, v in enumerate(freq):
        ax.text(i-horz_shift,v+vert_shift, str(round(v,2)), weight = w,fontsize=fsize,color = c)

## Donation Information

In [178]:
user_df = pd.read_csv('../data/Donations_File.csv', parse_dates=['Date'])

In [179]:
user_df = user_df[user_df['User ID'] > 0]

In [180]:
user_df['Event?'] = user_df['Event?'].apply(lambda x: False if math.isnan(x) else True)
user_df['Year'] = user_df['Date'].apply(lambda x: x.year)
user_df['Month'] = user_df['Date'].apply(lambda x: x.month)
user_df['Day'] = user_df['Date'].apply(lambda x: x.day)
user_df['Hour'] = user_df['Date'].apply(lambda x: x.hour)

In [181]:
user_df['Message to Charity2'] = user_df['Message to Charity2'].apply(lambda x: True if type(x) == str else False)
user_df['Message to Charity1'] = user_df['Message to Charity1'].apply(lambda x: True if type(x) == str else False)
user_df['Message'] = (user_df['Message to Charity2'] == True) | (user_df['Message to Charity1']==True)
user_df['Message'] = user_df['Message'].map({False:0,True:1})

In [182]:
user_df['Givz Everywhere?'] = user_df['Givz Everywehre?'].apply(lambda x: 1 if type(x) == str else 0)
user_df['Cover fee?'] = user_df['Cover fee?'].apply(lambda x: 1 if (x) == True else 0)
user_df['Event?'] = user_df['Event?'].apply(lambda x: 1 if (x) == True else 0)
user_df['In Honor Of?'] = user_df['In Honor Of?'].apply(lambda x: 1 if (x) == True else 0)

In [183]:
user_df['Recur_Monthly'] = user_df['Recurring Status'].apply(lambda x: 1 if (x) == "M" else 0)
user_df['Recur_Annually'] = user_df['Recurring Status'].apply(lambda x: 1 if (x) == "A" else 0)
user_df['Recur_O'] = user_df['Recurring Status'].apply(lambda x: 1 if (x) == "O" else 0)

In [184]:
user_df['Unnamed: 10'] = user_df['Unnamed: 10'].apply(lambda x: 1 if (x) == True else 0)

In [185]:
user_df['User ID'] = user_df['User ID'].apply(lambda x: int(x))

### user_df Drops

In [186]:
user_df.drop(columns = ['Message to Charity2','Message to Charity1','Recurring Status','Givz Everywehre?'], inplace = True)

### user_df Current Status

In [187]:
user_df.head()

Unnamed: 0,Donation ID,User ID,Amount,Date,To Charity,To Charity EIN,Advised Charity,Advised Charity EIN,Unnamed: 10,In Honor Of?,Event?,Cover fee?,Tip,Year,Month,Day,Hour,Message,Givz Everywhere?,Recur_Monthly,Recur_Annually,Recur_O
0,2753,2837,25.0,2019-09-11 03:17:52.392953-04:00,Social Good Fund,461323531,Room to Grow National Inc,134012096,0,0,0,0,0.0,2019,9,11,3,0,0,0,0,1
1,2752,1629,20.0,2019-09-10 22:55:52.758423-04:00,Social Good Fund,461323531,Wildlife Conservation Society,131740011,0,0,0,0,0.0,2019,9,10,22,0,0,1,0,0
2,2751,1134,21.0,2019-09-10 22:55:49.962729-04:00,Social Good Fund,461323531,Benchmark School,231728895,0,0,0,0,0.0,2019,9,10,22,0,0,1,0,0
3,2750,2566,140.0,2019-09-10 22:29:05.231148-04:00,Social Good Fund,461323531,Play Soccer to Give Corp,455071137,0,0,0,0,0.0,2019,9,10,22,0,1,0,0,1
4,2749,2847,1.0,2019-09-10 16:52:30.401878-04:00,Social Good Fund,461323531,Room to Grow National Inc,134012096,0,0,0,0,0.0,2019,9,10,16,0,0,0,0,1


### Cleaning User Information 

In [157]:
user_info_df = pd.read_csv('../data/user_donations_df_years.csv')
user_info_df.drop(columns=['Unnamed: 0'],inplace=True)

In [158]:
user_info_df['Gender_M'] = user_info_df['Gender'].apply(lambda x: 1 if (x) == "M" else 0)
user_info_df['Gender_F'] = user_info_df['Gender'].apply(lambda x: 1 if (x) == "F" else 0)

### user_info_df Drops

In [159]:
user_info_df.drop(columns = ['Gender','Birthday','Birth_Year','User_Age','Date created'], inplace = True)

### user_info_df Current Status

In [160]:
user_info_df.head()

Unnamed: 0,User ID,Date_Created_Year,Date_Created_Month,Account_Age,Gender_M,Gender_F
0,1078,2018,9,1.1,0,0
1,2555,2019,6,0.38,0,0
2,2792,2019,8,0.23,0,0
3,142,2017,12,1.89,0,0
4,369,2018,3,1.62,1,0


### Joining user_df with user_info_df

In [297]:
user_df_merged = user_df.merge(user_info_df, left_on='User ID',right_on='User ID' )

In [298]:
user_df_merged.head()

Unnamed: 0,Donation ID,User ID,Amount,Date,To Charity,To Charity EIN,Advised Charity,Advised Charity EIN,Unnamed: 10,In Honor Of?,Event?,Cover fee?,Tip,Year,Month,Day,Hour,Message,Givz Everywhere?,Recur_Monthly,Recur_Annually,Recur_O,Date_Created_Year,Date_Created_Month,Account_Age,Gender_M,Gender_F
0,2753,2837,25.0,2019-09-11 03:17:52.392953-04:00,Social Good Fund,461323531,Room to Grow National Inc,134012096,0,0,0,0,0.0,2019,9,11,3,0,0,0,0,1,2019,9,0.16,0,0
1,2752,1629,20.0,2019-09-10 22:55:52.758423-04:00,Social Good Fund,461323531,Wildlife Conservation Society,131740011,0,0,0,0,0.0,2019,9,10,22,0,0,1,0,0,2018,12,0.92,0,0
2,2703,1629,20.0,2019-08-10 22:55:53.127860-04:00,Social Good Fund,461323531,Wildlife Conservation Society,131740011,0,0,0,0,0.0,2019,8,10,22,0,0,1,0,0,2018,12,0.92,0,0
3,2631,1629,20.0,2019-07-10 22:55:52.018125-04:00,Social Good Fund,461323531,Wildlife Conservation Society,131740011,0,0,0,0,0.0,2019,7,10,22,0,0,1,0,0,2018,12,0.92,0,0
4,2448,1629,20.0,2019-06-10 22:55:52.357373-04:00,Social Good Fund,461323531,Wildlife Conservation Society,131740011,0,0,0,0,0.0,2019,6,10,22,0,0,1,0,0,2018,12,0.92,0,0


In [299]:
# # Fixing Specific Cases
user_df_merged['Advised Charity EIN'][1124] = '060726487'
user_df_merged['Advised Charity EIN'][1959] = '521285097'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [300]:
# user_df_merged['Advised Charity EIN'] = user_df_merged['Advised Charity EIN'].apply(lambda x: str(x))
user_df_merged['Advised Charity EIN'] = user_df_merged['Advised Charity EIN'].apply(lambda x: int(x.replace(',','')))

In [304]:
user_df_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2525 entries, 0 to 2524
Data columns (total 27 columns):
Donation ID            2525 non-null int64
User ID                2525 non-null int64
Amount                 2525 non-null float64
Date                   2525 non-null object
To Charity             2525 non-null object
To Charity EIN         2525 non-null int64
Advised Charity        2525 non-null object
Advised Charity EIN    2525 non-null int64
Unnamed: 10            2525 non-null int64
In Honor Of?           2525 non-null int64
Event?                 2525 non-null int64
Cover fee?             2525 non-null int64
Tip                    2525 non-null float64
Year                   2525 non-null int64
Month                  2525 non-null int64
Day                    2525 non-null int64
Hour                   2525 non-null int64
Message                2525 non-null int64
Givz Everywhere?       2525 non-null int64
Recur_Monthly          2525 non-null int64
Recur_Annually         252

## Adding in Charity Data

In [206]:
charity_df = pd.read_csv('../data/charity_info_categories.csv')
charity_df.drop(columns=['Unnamed: 0'],inplace=True)

In [210]:
charity_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1484617 entries, 0 to 1484616
Data columns (total 8 columns):
EIN                    1484617 non-null int64
NAME                   1484617 non-null object
INCOME_CD              1484617 non-null int64
ZIP_FIVE               1484617 non-null int64
NTEE_Major_Category    1484617 non-null object
NTEE_Minor_Category    1484617 non-null object
County                 1483480 non-null object
rating                 10476 non-null float64
dtypes: float64(1), int64(3), object(4)
memory usage: 90.6+ MB


In [207]:
charity_df.head()

Unnamed: 0,EIN,NAME,INCOME_CD,ZIP_FIVE,NTEE_Major_Category,NTEE_Minor_Category,County,rating
0,587764,IGLESIA BETHESDA INC,0,1852,"Religion-Related, Spiritual Development",Protestant,Middlesex County,
1,635913,MINISTERIO APOSTOLICO JESUCRISTO ES EL SENOR INC,0,1840,"Religion-Related, Spiritual Development",Protestant,Essex County,
2,765634,MERCY CHAPEL INTERNATIONAL,0,2126,"Religion-Related, Spiritual Development",Christian,Suffolk County,
3,841363,AGAPE HOUSE OF PRAYER,0,2126,"Religion-Related, Spiritual Development",Christian,Suffolk County,
4,852649,BETHANY PRESBYTERIAN CHURCH,0,2445,"Religion-Related, Spiritual Development",Christian,Norfolk County,


In [306]:
user_charity_df_merged = user_df_merged.merge(charity_df, left_on='Advised Charity EIN',right_on='EIN')

In [311]:
user_charity_df_merged.drop(columns=['EIN','NAME'],inplace=True)

In [312]:
user_charity_df_merged.head()

Unnamed: 0,Donation ID,User ID,Amount,Date,To Charity,To Charity EIN,Advised Charity,Advised Charity EIN,Unnamed: 10,In Honor Of?,Event?,Cover fee?,Tip,Year,Month,Day,Hour,Message,Givz Everywhere?,Recur_Monthly,Recur_Annually,Recur_O,Date_Created_Year,Date_Created_Month,Account_Age,Gender_M,Gender_F,INCOME_CD,ZIP_FIVE,NTEE_Major_Category,NTEE_Minor_Category,County,rating
0,2753,2837,25.0,2019-09-11 03:17:52.392953-04:00,Social Good Fund,461323531,Room to Grow National Inc,134012096,0,0,0,0,0.0,2019,9,11,3,0,0,0,0,1,2019,9,0.16,0,0,7,10001,Human Services - Multipurpose and Other,"Children's, Youth Services",New York County,4.0
1,2749,2847,1.0,2019-09-10 16:52:30.401878-04:00,Social Good Fund,461323531,Room to Grow National Inc,134012096,0,0,0,0,0.0,2019,9,10,16,0,0,0,0,1,2019,9,0.15,0,0,7,10001,Human Services - Multipurpose and Other,"Children's, Youth Services",New York County,4.0
2,2746,2845,1.0,2019-09-09 17:17:28.541816-04:00,Social Good Fund,461323531,Room to Grow National Inc,134012096,0,0,0,0,0.0,2019,9,9,17,0,0,0,0,1,2019,9,0.15,0,0,7,10001,Human Services - Multipurpose and Other,"Children's, Youth Services",New York County,4.0
3,2752,1629,20.0,2019-09-10 22:55:52.758423-04:00,Social Good Fund,461323531,Wildlife Conservation Society,131740011,0,0,0,0,0.0,2019,9,10,22,0,0,1,0,0,2018,12,0.92,0,0,9,10460,Animal-Related,D500,Bronx County,4.0
4,2703,1629,20.0,2019-08-10 22:55:53.127860-04:00,Social Good Fund,461323531,Wildlife Conservation Society,131740011,0,0,0,0,0.0,2019,8,10,22,0,0,1,0,0,2018,12,0.92,0,0,9,10460,Animal-Related,D500,Bronx County,4.0


## Implicit Collaborative Filtering

In [410]:
raw_data = user_charity_df_merged[['User ID','Advised Charity','Amount']]

In [411]:
# Drop Rows with Missing Values
data = raw_data.dropna()

In [413]:
# Convert charity names into numerical IDs
data['user_id'] = data['User ID'].astype("category").cat.codes
data['charity_id'] = data['Advised Charity'].astype("category").cat.codes

In [417]:
# Create a lookup frame so we can get the charity names back in 
# readable form later.
item_lookup = data[['charity_id', 'Advised Charity']].drop_duplicates()
item_lookup['charity_id'] = item_lookup['charity_id'].astype(str)

In [418]:
item_lookup

Unnamed: 0,charity_id,Advised Charity
0,439,Room to Grow National Inc
3,557,Wildlife Conservation Society
14,396,Play Soccer to Give Corp
85,280,Lemon Bay Junior Golf Foundation Inc
89,410,Purple Heart Homes Inc
...,...,...
2354,571,Zen Hospice Project Inc
2356,322,Muscular Dystrophy Association
2357,520,Treatment Advocacy Center
2358,7,Ajiri Foundation


In [373]:
data = data.drop(['User ID', 'Advised Charity'], axis=1)

In [374]:
# Drop Rows with $0 Donation
data = data.loc[data.Amount > 0]

In [375]:
# Create lists of all users, charities and amounts
users = list(np.sort(data['user_id'].unique()))
charities = list(np.sort(data['charity_id'].unique()))
amounts = list(data.Amount)

In [376]:
# Get the rows and columns for our new matrix
rows = data['user_id'].astype(int)
cols = data['charity_id'].astype(int)

In [377]:
# Contruct a sparse matrix for our users and items containing amounts
data_sparse = sparse.csr_matrix((amounts, (rows, cols)), shape=(len(users), len(charities)))

In [378]:
def implicit_als(sparse_data, alpha_val=40, iterations=10, lambda_val=0.1, features=10):
    """ Implementation of Alternating Least Squares with implicit data. We iteratively
    compute the user (x_u) and item (y_i) vectors using the following formulas:
 
    x_u = ((Y.T*Y + Y.T*(Cu - I) * Y) + lambda*I)^-1 * (X.T * Cu * p(u))
    y_i = ((X.T*X + X.T*(Ci - I) * X) + lambda*I)^-1 * (Y.T * Ci * p(i))
 
    Args:
        sparse_data (csr_matrix): Our sparse user-by-item matrix
 
        alpha_val (int): The rate in which we'll increase our confidence
        in a preference with more interactions.
 
        iterations (int): How many times we alternate between fixing and 
        updating our user and item vectors
 
        lambda_val (float): Regularization value
 
        features (int): How many latent features we want to compute.
    
    Returns:     
        X (csr_matrix): user vectors of size users-by-features
        
        Y (csr_matrix): item vectors of size items-by-features
     """

    # Calculate the foncidence for each value in our data
    confidence = sparse_data * alpha_val
    
    # Get the size of user rows and item columns
    user_size, item_size = sparse_data.shape
    
    # We create the user vectors X of size users-by-features, the item vectors
    # Y of size items-by-features and randomly assign the values.
    X = sparse.csr_matrix(np.random.normal(size = (user_size, features)))
    Y = sparse.csr_matrix(np.random.normal(size = (item_size, features)))
    
    #Precompute I and lambda * I
    X_I = sparse.eye(user_size)
    Y_I = sparse.eye(item_size)
    
    I = sparse.eye(features)
    lI = lambda_val * I
    
    """ Continuation of implicit_als function"""

    # Start main loop. For each iteration we first compute X and then Y
    for i in range(iterations):
        print ('iteration %d of %d' % (i+1, iterations))
        
        # Precompute Y-transpose-Y and X-transpose-X
        yTy = Y.T.dot(Y)
        xTx = X.T.dot(X)

        # Loop through all users
        for u in range(user_size):

            # Get the user row.
            u_row = confidence[u,:].toarray() 

            # Calculate the binary preference p(u)
            p_u = u_row.copy()
            p_u[p_u != 0] = 1.0

            # Calculate Cu and Cu - I
            CuI = sparse.diags(u_row, [0])
            Cu = CuI + Y_I

            # Put it all together and compute the final formula
            yT_CuI_y = Y.T.dot(CuI).dot(Y)
            yT_Cu_pu = Y.T.dot(Cu).dot(p_u.T)
            X[u] = spsolve(yTy + yT_CuI_y + lI, yT_Cu_pu)

    
        for i in range(item_size):

            # Get the item column and transpose it.
            i_row = confidence[:,i].T.toarray()

            # Calculate the binary preference p(i)
            p_i = i_row.copy()
            p_i[p_i != 0] = 1.0

            # Calculate Ci and Ci - I
            CiI = sparse.diags(i_row, [0])
            Ci = CiI + X_I

            # Put it all together and compute the final formula
            xT_CiI_x = X.T.dot(CiI).dot(X)
            xT_Ci_pi = X.T.dot(Ci).dot(p_i.T)
            Y[i] = spsolve(xTx + xT_CiI_x + lI, xT_Ci_pi)

    return X, Y
    

In [379]:
user_vecs, item_vecs = implicit_als(data_sparse, iterations=20, features=20, alpha_val=40)


iteration 1 of 20
iteration 2 of 20
iteration 3 of 20
iteration 4 of 20
iteration 5 of 20
iteration 6 of 20
iteration 7 of 20
iteration 8 of 20
iteration 9 of 20
iteration 10 of 20
iteration 11 of 20
iteration 12 of 20
iteration 13 of 20
iteration 14 of 20
iteration 15 of 20
iteration 16 of 20
iteration 17 of 20
iteration 18 of 20
iteration 19 of 20
iteration 20 of 20


In [382]:
item_lookup

Unnamed: 0,charity_id,Advised Charity
0,Room to Grow National Inc,Room to Grow National Inc
3,Wildlife Conservation Society,Wildlife Conservation Society
14,Play Soccer to Give Corp,Play Soccer to Give Corp
85,Lemon Bay Junior Golf Foundation Inc,Lemon Bay Junior Golf Foundation Inc
89,Purple Heart Homes Inc,Purple Heart Homes Inc
...,...,...
2354,Zen Hospice Project Inc,Zen Hospice Project Inc
2356,Muscular Dystrophy Association,Muscular Dystrophy Association
2357,Treatment Advocacy Center,Treatment Advocacy Center
2358,Ajiri Foundation,Ajiri Foundation


In [422]:
item_lookup

Unnamed: 0,charity_id,Advised Charity
0,439,Room to Grow National Inc
3,557,Wildlife Conservation Society
14,396,Play Soccer to Give Corp
85,280,Lemon Bay Junior Golf Foundation Inc
89,410,Purple Heart Homes Inc
...,...,...
2354,571,Zen Hospice Project Inc
2356,322,Muscular Dystrophy Association
2357,520,Treatment Advocacy Center
2358,7,Ajiri Foundation


'Muscular Dystrophy Association'

In [431]:
#------------------------------
# FIND SIMILAR ITEMS
#------------------------------

# Let's find similar charities to __________ (Use item_lookup to locate charity_id). 
item_id = 30

# Get the item row for selected charity.
item_vec = item_vecs[item_id].T

# Calculate the similarity score between selected charity and other charities
# and select the top 10 most similar.
scores = item_vecs.dot(item_vec).toarray().reshape(1,-1)[0]
top_10 = np.argsort(scores)[::-1][:10]

charities = []
charity_scores = []

print("Charities Similar to:", item_lookup['Advised Charity'].loc[item_lookup.charity_id == str(item_id)].iloc[0],"\n")

# Get and print the actual charity names and scores
for idx in top_10:
    charities.append(item_lookup['Advised Charity'].loc[item_lookup.charity_id == str(idx)].iloc[0])
    charity_scores.append(scores[idx])

similar = pd.DataFrame({'charities': charities, 'score': charity_scores})

print (similar)

Charities Similar to: American Society For The Prevention Of Cruelty To Animals 

                                           charities     score
0                                         Spirits Up  0.023737
1                               Hungry for Music Inc  0.023526
2                 International Rescue Committee Inc  0.023014
3  American Society For The Prevention Of Cruelty...  0.021541
4                       Zoe Rose Memorial Foundation  0.015883
5                                      Direct Relief  0.015722
6                      Innocence Project New Orleans  0.015137
7                      Wildlife Conservation Society  0.015073
8                                    Girls With Guts  0.015066
9                        African Wildlife Foundation  0.014334


In [441]:
#------------------------------
# CREATE USER RECOMMENDATIONS
#------------------------------

def recommend(user_id, data_sparse, user_vecs, item_vecs, item_lookup, num_items=10):
    """Recommend items for a given user given a trained model
    
    Args:
        user_id (int): The id of the user we want to create recommendations for.
        
        data_sparse (csr_matrix): Our original training data.
        
        user_vecs (csr_matrix): The trained user x features vectors
        
        item_vecs (csr_matrix): The trained item x features vectors
        
        item_lookup (pandas.DataFrame): Used to map charity_id to charity names
        
        num_items (int): How many recommendations we want to return:
        
    Returns:
        recommendations (pandas.DataFrame): DataFrame with num_items charity names and scores
    
    """
  
    # Get all interactions by the user
    user_interactions = data_sparse[user_id,:].toarray()

    # We don't want to recommend items the user has consumed. So let's
    # set them all to 0 and the unknowns to 1.
    user_interactions = user_interactions.reshape(-1) + 1 #Reshape to turn into 1D array
    user_interactions[user_interactions > 1] = 0

    # This is where we calculate the recommendation by taking the 
    # dot-product of the user vectors with the item vectors.
    rec_vector = user_vecs[user_id,:].dot(item_vecs.T).toarray()

    # Let's scale our scores between 0 and 1 to make it all easier to interpret.
    min_max = MinMaxScaler()
    rec_vector_scaled = min_max.fit_transform(rec_vector.reshape(-1,1))[:,0]
    recommend_vector = user_interactions*rec_vector_scaled
   
    # Get all the charity indices in order of recommendations (descending) and
    # select only the top "num_items" items. 
    item_idx = np.argsort(recommend_vector)[::-1][:num_items]

    charities = []
    scores = []

    # Loop through our recommended charity indicies and look up the actual charity name
    for idx in item_idx:
        charities.append(item_lookup['Advised Charity'].loc[item_lookup.charity_id == str(idx)].iloc[0])
        scores.append(recommend_vector[idx])

    # Create a new dataframe with recommended charity names and scores
    recommendations = pd.DataFrame({'charity': charities, 'score': scores})
    
    return recommendations

In [444]:
# Let's say we want to recommend charities for user
user_id = 12

#------------------------------
# GET ITEMS Donated to BY USER
#------------------------------

# Let's print out what the user has donated to
consumed_idx = data_sparse[user_id,:].nonzero()[1].astype(str)
consumed_items = item_lookup.loc[item_lookup.charity_id.isin(consumed_idx)]
print (consumed_items)


# Let's generate and print our recommendations
recommendations = recommend(user_id, data_sparse, user_vecs, item_vecs, item_lookup)
print("\n User-User Recommendations:")
print (recommendations)

     charity_id                         Advised Charity
1512        309  Memorial Sloan Kettering Cancer Center

 User-User Recommendations:
                                             charity     score
0                  Temple Beth El Of Northern Valley  0.484867
1                                   Social Good Fund  0.481867
2                             Purple Heart Homes Inc  0.452642
3  Alzheimers Disease And Related Disorders Assoc...  0.419054
4                                   Exhale to Inhale  0.416827
5                                 South Bronx United  0.399426
6                                  Tuesdays Children  0.394489
7                    Trustees for Harvard University  0.393587
8                               Exhale To Inhale Inc  0.381317
9            St Jude Childrens Research Hospital Inc  0.381023


## Machine Learning - Implicit Recommender

In [193]:
train_df = (user_df_merged[user_df_merged['Date'] < pd.to_datetime('2019-04-01 08:24:10.798807-04:00')])

In [194]:
test_df = (user_df_merged[user_df_merged['Date'] > pd.to_datetime('2019-04-01 08:24:10.798807-04:00')])

In [200]:
test_df.head(5)

Unnamed: 0,Donation ID,User ID,Amount,Date,To Charity,To Charity EIN,Advised Charity,Advised Charity EIN,Unnamed: 10,In Honor Of?,Event?,Cover fee?,Tip,Year,Month,Day,Hour,Message,Givz Everywhere?,Recur_Monthly,Recur_Annually,Recur_O,Date_Created_Year,Date_Created_Month,Account_Age,Gender_M,Gender_F
0,2753,2837,25.0,2019-09-11 03:17:52.392953-04:00,Social Good Fund,461323531,Room to Grow National Inc,134012096,0,0,0,0,0.0,2019,9,11,3,0,0,0,0,1,2019,9,0.16,0,0
1,2752,1629,20.0,2019-09-10 22:55:52.758423-04:00,Social Good Fund,461323531,Wildlife Conservation Society,131740011,0,0,0,0,0.0,2019,9,10,22,0,0,1,0,0,2018,12,0.92,0,0
2,2703,1629,20.0,2019-08-10 22:55:53.127860-04:00,Social Good Fund,461323531,Wildlife Conservation Society,131740011,0,0,0,0,0.0,2019,8,10,22,0,0,1,0,0,2018,12,0.92,0,0
3,2631,1629,20.0,2019-07-10 22:55:52.018125-04:00,Social Good Fund,461323531,Wildlife Conservation Society,131740011,0,0,0,0,0.0,2019,7,10,22,0,0,1,0,0,2018,12,0.92,0,0
4,2448,1629,20.0,2019-06-10 22:55:52.357373-04:00,Social Good Fund,461323531,Wildlife Conservation Society,131740011,0,0,0,0,0.0,2019,6,10,22,0,0,1,0,0,2018,12,0.92,0,0
