In [1]:
import numpy as np
import pandas as pd
import math
import json
from SPARQLWrapper import SPARQLWrapper, JSON
import requests

data = pd.read_csv('RS/data.csv', sep = ', ', engine = 'python')
placeData = pd.read_csv('RS/context_place.csv', sep = ', ', engine = 'python')
dayData = pd.read_csv('RS/context_day.csv', sep = ', ', engine = 'python')
headerList = ["movieNum", "movieName"]
movieNameData = pd.read_csv('RS/movie_names.csv', sep = ', ', engine = 'python', names = headerList)

In [7]:
notRated = -1
def queryForRecommendations(movieNameData, keys):
    for i in range(0,len(keys)):
        API_ENDPOINT = "https://www.wikidata.org/w/api.php"
        movie_name = movieNameData['movieName'][movieNameData['movieNum'] == keys[i]].to_csv(header=None, index=False)
        params = {
            'action' : 'wbsearchentities',
            'format' : 'json',
            'language' : 'en',
            'search': movie_name
        }
        res = requests.get(API_ENDPOINT, params = params)
        res.json()['search'][0]['description']
        movieShortCut = res.json()['search'][0]['id']
        sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

        sparql_query = """
        #defaultView:Map
        SELECT ?actor ?coords ?actorLabel ?placeLabel
        WHERE {
          wd:%s wdt:P161 ?actor .
          ?actor wdt:P21 wd:Q6581072 .
          ?actor wdt:P19 ?place .
          ?place wdt:P625 ?coords .
          SERVICE wikibase:label {bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".}
              }

        """ % (movieShortCut)

        sparql.setQuery(sparql_query)
        sparql.setReturnFormat(JSON)
        results = sparql.query().convert()
        results_df = pd.json_normalize(results['results']['bindings'])
        output = results_df[['actorLabel.value', 'placeLabel.value']]
        output.columns = ['Actress name', 'Place of birth']
        print('\n' + movie_name)
        print(output)
        
        
def findSimilaritiesAndMean(userIndex, data, notRatedMarker):
    sims = []
    for i in range(0, data.shape[0]):
        if i == userIndex:
            continue
        userInfo = data.iloc[[i,userIndex]]
        userInfo = userInfo.drop(columns=[userInfo.columns[0]])
        userInfo = userInfo.transpose()
        infoWithRates = userInfo[(userInfo[i] != notRatedMarker) & (userInfo[userIndex] != notRatedMarker)]
        user = data.iloc[[i]].drop(columns=[data.iloc[[i]].columns[0]]).transpose()
        userMean = round(user[(user[i]!= notRatedMarker)].mean(),3)
        numerator = 0
        denom1 = 0
        denom2 = 0
        for k in range(0,infoWithRates.shape[0]):
            numerator += infoWithRates.iloc[k, 0] * infoWithRates.iloc[k, 1]
            denom1 += infoWithRates.iloc[k, 0] ** 2
            denom2 += infoWithRates.iloc[k, 1] ** 2
        sim = numerator / (math.sqrt(denom1) * math.sqrt(denom2))
        sim = round(sim,3)
        sims.append((i, userMean, sim))
    dtype = [('user', int), ('mean', float), ('sim', float)]
    sims = np.array(sims, dtype=dtype)
    sims = np.sort(sims, order = 'sim')[::-1]
    return sims


def makeRecomendation(userIndex, data, notRatedMarker):
    sims = findSimilaritiesAndMean(userIndex,data,notRatedMarker)
    myUser = data.iloc[[userIndex]].drop(columns=[data.iloc[[userIndex]].columns[0]]).transpose()
    myUserMean = myUser[(myUser[userIndex] != notRatedMarker)].mean()
    unrated = myUser[myUser[userIndex] == notRatedMarker].index
    rates = {}
    for i in range(0,unrated.shape[0]):
        count = 0
        absSum = 0
        nominator = 0
        for k in range(0, sims.shape[0]):
            if count == 4:
                break
            rate = data[unrated[i]][sims[k][0]]
            if rate == notRatedMarker:
                continue
            else:
                count +=1
                nominator += sims[k][2] * (rate - sims[k][1])
                absSum += abs(sims[k][2])
        newRate = round(myUserMean + nominator / absSum, 3)
        rates[unrated[i]] = newRate[userIndex]
    return rates


result1 = makeRecomendation(2, data, notRated)
print(result1)
#

{'Movie 7': 3.425, 'Movie 12': 2.383, 'Movie 20': 3.184, 'Movie 22': 2.682, 'Movie 29': 2.359}


In [3]:
queryForRecommendations(movieNameData, list(result.keys()))


The Lord of the Rings: The Return of the King

     Actress name Place of birth
0  Cate Blanchett      Melbourne
1    Miranda Otto       Brisbane
2    Sarah McLeod       Putāruru
3       Liv Tyler    East Harlem

Forrest Gump

            Actress name            Place of birth
0       Marla Sucharetza             New York City
1     Mary Ellen Trainor             San Francisco
2        Elizabeth Hanks               Los Angeles
3        Hilary Chaplain                    Boston
4         Deborah McTeer            South Carolina
5          Hanna R. Hall                    Denver
6           Robin Wright                    Dallas
7      Jacqueline Lovell             Beverly Hills
8   Siobhan Fallon Hogan                  Syracuse
9            Emily Carey  London Borough of Barnet
10           Isabel Rose           Upper East Side
11           Sally Field                  Pasadena
12        Hallie D'Amore                    Harvey
13        Ione M. Telech                  Beaufort
14     

In [4]:
def weekendAdvice(userIndex, data, placeData, dayData, notRatedMarker):
    weekendHomeFilms = pd.DataFrame()
    sims = findSimilaritiesAndMean(userIndex, data, notRatedMarker)
    for i in range(0,4):
        newdf = pd.concat([data.iloc[[sims[i][0]]], placeData.iloc[[sims[i][0]]], dayData.iloc[[sims[i][0]]]], ignore_index=True)
        newdf = newdf.drop(columns=[newdf.columns[0], newdf.columns[newdf.shape[1]-1]]).transpose()
        newdf = newdf[((newdf[2] == 'Sun') | (newdf[2] == 'Sat')) & (newdf[1] == 'h')]
        weekendHomeFilms = pd.concat([weekendHomeFilms, newdf], ignore_index=False)
    return weekendHomeFilms[weekendHomeFilms[0] == weekendHomeFilms[0].max()].index[0]

res2 = weekendAdvice(2,data, placeData, dayData, notRated)

In [8]:
task1 = json.dumps(result1, sort_keys=True, indent=4)
user = 3
task2 = json.dumps(res2, sort_keys=True, indent=4) 
result = json.dumps({'user': user,'1': result1, '2': {'Movie_name': res2}}, indent=4)
print(result)

{
    "user": 3,
    "1": {
        "Movie 7": 3.425,
        "Movie 12": 2.383,
        "Movie 20": 3.184,
        "Movie 22": 2.682,
        "Movie 29": 2.359
    },
    "2": {
        "Movie_name": "Movie 1"
    }
}
