In [144]:
import implicit
from scipy.sparse import coo_matrix
from scipy.sparse import csr_matrix
import numpy as np
import pandas as pd

In [145]:
pwd

'/Users/dillonquan/Desktop/Capstone Project 2'

In [47]:
df_groups = pd.read_csv('groups.csv')
df_members = pd.read_csv('members.csv', encoding = 'latin-1')

In [48]:
#dropping unecessary groups from members dataframe
df_members = df_members.drop(['bio','country','hometown','joined','lat','link','lon','member_name','state','member_status','visited'], axis = 1)

In [49]:
#dropping unecessary columns from groups
df_groups = df_groups.drop(['category_id','category.name','category.shortname','city_id',
 'country','created','description','group_photo.base_url','group_photo.highres_link',
 'group_photo.photo_id','group_photo.photo_link','group_photo.thumb_link','group_photo.type','join_mode','lat','link',
 'lon','members','organizer.member_id','organizer.name','organizer.photo.base_url','organizer.photo.highres_link',
 'organizer.photo.photo_id','organizer.photo.photo_link','organizer.photo.thumb_link','organizer.photo.type','rating','state','timezone','urlname','utc_offset','visibility','who'], axis = 1)

In [51]:
def get_city(city):
    """Take in the acronym of the city and return a dataframe containing only members and groups from that city."""
    
    #defense coding
    if city not in ['SF', 'NYC', 'CHI']:
        raise ValueError('Invalid City')
        
    #separating the members and groups by their corresponding city
    
    #SF
    sf = ['San Francisco', 'san francisco', 'South San Francisco']
    df_sf = df_members[df_members.city.isin(sf)]
    df_sfg = df_groups[df_groups.city.isin(sf)]
    
    #NY
    ny = ['New York', 'West New York', 'New York Mills']
    df_ny = df_members[df_members.city.isin(ny)]
    df_nyg = df_groups[df_groups.city.isin(ny)]
    
    #CHI
    chi = ['Chicago','Chicago Heights','West Chicago','Chicago Ridge','East Chicago','North Chicago','Chicago Park']
    df_chi = df_members[df_members.city.isin(chi)]
    df_chig = df_groups[df_groups.city.isin(chi)]
    
    if city == 'SF':
        return df_sf.merge(df_sfg, on = 'group_id')
    elif city == 'NYC':
        return df_ny.merge(df_nyg, on = 'group_id')
    elif city == 'CHI':
        return df_chi.merge(df_chig, on = 'group_id')

In [52]:
#getting data from a specific city
df = get_city('SF')

In [64]:
df.shape

(1472925, 5)

In [65]:
#this confirms that there isnt any members in sf that isnt in a group
df['group_id'].dropna().shape

(1472925,)

In [117]:
#made the group/member id of category type to give each unique id(both) an index that allows for mapping back to user
#and group name
df['m_code'] = df['member_id'].astype('category').cat.codes
df['g_code'] = df['group_id'].astype('category').cat.codes

In [146]:
df.head()

Unnamed: 0,member_id,city_x,group_id,city_y,group_name,dummy,m_code,g_code
0,65,San Francisco,2701562,San Francisco,GoSF,1,0,583
1,29009,San Francisco,2701562,San Francisco,GoSF,1,38,583
2,67424,San Francisco,2701562,San Francisco,GoSF,1,58,583
3,94707,San Francisco,2701562,San Francisco,GoSF,1,83,583
4,95826,San Francisco,2701562,San Francisco,GoSF,1,84,583


In [149]:
#create the 2 separate sparse matrix where 1 is item x user and the other user x item
item_user = csr_matrix((df['dummy'], (df['g_code'],df['m_code']))) #required to fit the model
user_item = csr_matrix((df['dummy'], (df['m_code'], df['g_code']))) #used as an input for recommend method

### Implicit Library First Trial

In [147]:
#initializing the ALS model
model = implicit.als.AlternatingLeastSquares(factors = 20, regularization = .01, iterations = 20)

#preset alpha for the confidence learning rate
alpha = 15

#confidence c is defined by 1 + alpha*item_user where item_user is the interaction between an item and user
#Note: the model takes care of the 1 so any negative preference will be set to a confidence of 1 when fitting
c_item_user = item_user * alpha

#fitting the model using the item_user matrix after it has been multiplied by alpha
model.fit(c_item_user)



In [176]:
#retrieving recommended groups for user 84. (This 84 corresponds to m_code not member_id)
recommend = model.recommend(84, user_item, N = 20)

In [177]:
#unpacking recommend (list of tuples) into their own list
idx = []
scores = []
for item in recommend:
    item_id, score = item
    idx.append(item_id)
    scores.append(score)

In [178]:
idx

[475,
 293,
 119,
 574,
 339,
 107,
 241,
 145,
 539,
 187,
 1311,
 1003,
 10,
 448,
 954,
 21,
 937,
 1193,
 927,
 1338]

In [179]:
#Showing the recommended group names
rec = list(df[df['g_code'].isin(idx)].loc[:,'group_name'].unique())
rec

['The SF JavaScript Meetup',
 'San Francisco Python Meetup Group',
 'Docker San Francisco',
 'The San Francisco Ruby Meetup Group',
 'SF New Tech',
 'The San Francisco Android GDG',
 'SFHTML5',
 'Google Developer Group San Francisco @sf_gdg',
 'Swift Language User Group (San Francisco)',
 'SF Virtual Reality',
 'Upload SF - VR & AR Meetup Group',
 'The San Francisco Java User Group',
 'iOS Developers (SF / SV)',
 'Bay Area Software Engineers (BASE)',
 'San Francisco Ruby on Rails Group',
 'Unity (official) - San Francisco',
 '20 and 30 somethings in San Francisco - AWESOME Stuff',
 'OpenLate - SF Tech Talks and Hack Nights',
 'Bay Area Hackathons',
 '20-somethings friends & fun in SF']

In [180]:
#this confirms the recommended groups are not groups that a user is already participating in
for i in rec:
    if i in df[df['member_id'] == 95826]['group_name']:
        print(i)

In [181]:
scores

[1.0058045,
 0.93341517,
 0.85864979,
 0.81541562,
 0.81189871,
 0.79300177,
 0.77500331,
 0.77035034,
 0.76005113,
 0.74547756,
 0.7441256,
 0.72162366,
 0.71861124,
 0.71826637,
 0.71266431,
 0.71151167,
 0.68971443,
 0.68463618,
 0.68380594,
 0.6748932]

In [182]:
#groups that this user is in
df[df['member_id'] == 95826]['group_name']

4                                               GoSF
67974                    Hack Reactor: Learn to Code
99279                San Francisco TechCrunch Meetup
162465          Tech in Motion Events: San Francisco
210593       SF Mobile App Developers iPhone Android
214051                             Game and App Devs
277255               The Bay Area Clojure User Group
282500          Hacks/Hackers San Francisco Bay Area
349490    San Franciso Game Development Meetup Group
385409                  T.O.J. - Tales Of JavaScript
435054                         Doximity's Tech Night
448462                                      SF Scala
496965                      irish music trad session
511389                                     Tinderbox
597361    The Red Lantern: Bay Area Asian Cinephiles
598043                   Startup Grind San Francisco
600314                       SF Clojurescript Meetup
600478        SF Bay Area Game Jamming & Game Design
600916                          Game Dev Study