<a href="https://colab.research.google.com/github/has-c/UserMobilityMining/blob/master/Gowalla%20ANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import numpy as np
import pandas as pd
import keras
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Embedding, Softmax
from sklearn.metrics import accuracy_score
import datetime

#disable warnings
import warnings
warnings.filterwarnings("ignore")

## Helper Functions

In [56]:
def alignPastCurrentPOI(checkins, dateList):

    #group by userId, then checkin date and then order by time 
    uniqueUserIds = np.unique(checkins['userid'])
    uniqueCheckinDates = sorted(np.unique(dateList))

    sortedCheckins = pd.DataFrame([])

    for userId in uniqueUserIds:
        for checkinDate in uniqueCheckinDates:
            
            #group by userId and checkin date 
            groupedByUserId = checkins[userId==checkins['userid']]
            groupedByCheckinDate = groupedByUserId[groupedByUserId['Date'] == checkinDate]
            if groupedByCheckinDate.shape[0] == 0:
                continue
                
            #sort by time 
            groupedByCheckinDate.sort_values(by ='Time',ascending=True, inplace=True)
            groupedByCheckinDate.index = range(0, groupedByCheckinDate.shape[0])
            
            #create list of sorted checkins 
            sortedCheckins = pd.concat([sortedCheckins, groupedByCheckinDate], axis=0, ignore_index=True)
    
    #align past and current checkins - features and labels entire df
    pastCurrentCheckins = pd.DataFrame([])

    for userId in uniqueUserIds:
        
        #group by userId
        groupedByUserId = sortedCheckins[userId==checkins['userid']]
        
        for rowIndex in groupedByUserId.index:
            try:
                pastPOI = groupedByUserId.loc[rowIndex]
                currentPOI = groupedByUserId.loc[rowIndex+1]
                poiInstance = list(currentPOI) + list(pastPOI)
                pastCurrentCheckins = pd.concat([pastCurrentCheckins, pd.DataFrame(poiInstance).T], axis=0)
            except KeyError:
                break

    #rename fields 
    pastCurrentCheckinDfColumnNames = ['Current ' + name for name in sortedCheckins.columns] + ['Past ' + name for name in sortedCheckins.columns]
    pastCurrentCheckins.columns = pastCurrentCheckinDfColumnNames

    return pastCurrentCheckins

### Preprocessing 
<ul>
    <li> One hot encoding </li>
    <li> Standard scaling </li>
</ul>

In [41]:
#import data
checkinsDf = pd.read_csv("/home/usermobilitymining/Notebooks/volume/Hasnain/Processed Data/processedCheckinsSubset.csv")

In [42]:
#convert date and time to datetime variables
#extract day of the week and hour of checkin
checkinDate = [datetime.datetime.strptime(date, "%Y-%m-%d").date() for date in checkinsDf['Date']]
checkinDay = [date.weekday() for date in checkinDate]
checkinTime = [datetime.datetime.strptime(time, '%H:%M:%S').time() for time in checkinsDf['Time']]
checkinHour = [time.hour for time in checkinTime]

In [43]:
#one hot encode the time variable
ohe = OneHotEncoder(dtype=np.int8, n_values=24)
hourOfCheckinEncoded = ohe.fit_transform(np.array(checkinHour).reshape(-1,1))
timeColumns = ['Time ' + str(num) for num in np.arange(0,24)]
timeDf = pd.DataFrame(hourOfCheckinEncoded.toarray(), columns=timeColumns)

#one hot encode the day of the week variable
ohe = OneHotEncoder(dtype=np.int8, n_values=7)
dayOfTheWeekEncoded = ohe.fit_transform(np.array(checkinDay).reshape(-1,1))
dayColumns = ['DayOfWeek ' + str(num) for num in np.arange(0,7)]
dayDf = pd.DataFrame(dayOfTheWeekEncoded.toarray(), columns=dayColumns)

#one hot encode the main category and specific category 


In [50]:
#concat and drop some columns
checkinsDf = pd.concat([checkinsDf, timeDf], axis=1,copy=False) 
checkinsDf = pd.concat([checkinsDf, dayDf], axis=1,copy=False) 

In [54]:
checkinsDf.shape

(2000030, 47)

In [57]:
#align past and present instances
dateList = np.unique(checkinDate)
pastCurrentCheckins = alignPastCurrentPOI(checkinsDf, dateList)

KeyboardInterrupt: 

In [None]:
pastCurrentCheckins.shape

In [None]:
#Normalize the data
sc = StandardScaler()
X = sc.fit_transform(X)

In [None]:
#train test s