In [41]:
from distutils import core
import pandas as pd 
import numpy as np 
import ivy
from typing import List, Dict, Union, Optional

In [42]:
# sample ivy test
class MyModel(ivy.Module):
    def __init__(self):
        self.linear0 = ivy.Linear(3, 64)
        self.linear1 = ivy.Linear(64, 1)
        ivy.Module.__init__(self)

    def _forward(self, x):
        x = ivy.relu(self.linear0(x))
        return ivy.sigmoid(self.linear1(x))

ivy.set_framework('torch')  # change to any preffered framework!
model = MyModel()
optimizer = ivy.Adam(1e-4)
x_in = ivy.array([1., 2., 3.])
target = ivy.array([0.])

def loss_fn(v):
    out = model(x_in, v=v)
    return ivy.reduce_mean((out - target)**2)

for step in range(100):
    loss, grads = ivy.execute_with_gradients(loss_fn, model.v)
    model.v = optimizer.step(model.v, grads)
    print('step {} loss {}'.format(step, ivy.to_numpy(loss).item()))

print('Finished training!')

step 0 loss 0.35689836740493774
step 1 loss 0.35647013783454895
step 2 loss 0.3561514914035797
step 3 loss 0.355878084897995
step 4 loss 0.3556293547153473
step 5 loss 0.3553960919380188
step 6 loss 0.3551727533340454
step 7 loss 0.3549562990665436
step 8 loss 0.35474440455436707
step 9 loss 0.35453563928604126
step 10 loss 0.3543287515640259
step 11 loss 0.35412293672561646
step 12 loss 0.35391736030578613
step 13 loss 0.35371172428131104
step 14 loss 0.35350552201271057
step 15 loss 0.3532983660697937
step 16 loss 0.3530898690223694
step 17 loss 0.35287973284721375
step 18 loss 0.3526681065559387
step 19 loss 0.35245442390441895
step 20 loss 0.3522387444972992
step 21 loss 0.35202091932296753
step 22 loss 0.3518007695674896
step 23 loss 0.3515782952308655
step 24 loss 0.3513534367084503
step 25 loss 0.3511258661746979
step 26 loss 0.3508957624435425
step 27 loss 0.35066312551498413
step 28 loss 0.3504277467727661
step 29 loss 0.35018959641456604
step 30 loss 0.34994882345199585
step 

In [43]:
# downloading dataset from kaggle 

import io
import os
from urllib.request import urlopen
from urllib.error import URLError
import zipfile

def get_data(url : str , download_path):
    data_name : str = url.split("/")[-1]
    folder_name : str = os.path.splitext(data_name)

    try: 
        req =urlopen(url)
    except URLError as e:
        print(f'Cannot dowload data. Error {e.reason}')
        return 

    assert req.status == 200
    data = req.read()

    with zipfile.ZipFile(io.BytesIO(data)) as archive:
        archive.extractall(download_path)

    print(f'The data has been extracted into this folder {download_path}')




In [68]:
# reading the data from path 

def read_data(path):
    files : dict = {}
    for filename in path.glob('*'):
        if filename.suffix == '.csv':
            files[filename.stem] = pd.read_csv(filename)
        elif filename.suffix == '.dat':
            if filename.stem == 'ratings':
                columns = ['userId', 'movieId', 'rating', 'timestamp']
            else:
                columns = ['movieId', 'title', 'genres']
            data = pd.read_csv(filename, sep='::', names=columns, engine='python', encoding='latin-1')
            files[filename.stem] = data
    return files['ratings'], files['movies']

In [53]:
# dataset kaggle
from pathlib import Path


archive_url = f'http://files.grouplens.org/datasets/movielens/ml-1m.zip'
download_path = Path('.') / '../data' / 'movielens'

In [67]:
get_data(archive_url,download_path)
type(download_path)

pathlib.PosixPath

In [69]:
ratings, movies = read_data( download_path /'ml-1m')

In [70]:
ratings.head(5)


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [73]:
movies.head(20)


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children's
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


In [66]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000209 entries, 0 to 1000208
Data columns (total 4 columns):
 #   Column     Non-Null Count    Dtype
---  ------     --------------    -----
 0   userId     1000209 non-null  int64
 1   movieId    1000209 non-null  int64
 2   rating     1000209 non-null  int64
 3   timestamp  1000209 non-null  int64
dtypes: int64(4)
memory usage: 30.5 MB


In [72]:
ratings.describe()


Unnamed: 0,userId,movieId,rating,timestamp
count,1000209.0,1000209.0,1000209.0,1000209.0
mean,3024.512,1865.54,3.581564,972243700.0
std,1728.413,1096.041,1.117102,12152560.0
min,1.0,1.0,1.0,956703900.0
25%,1506.0,1030.0,3.0,965302600.0
50%,3070.0,1835.0,4.0,973018000.0
75%,4476.0,2770.0,4.0,975220900.0
max,6040.0,3952.0,5.0,1046455000.0


Data Visualization

In [80]:
# Tabular view of the data 

def tabular_view(ratings, n=15):
    """ creates a cross-tabular view of users vs movies """

    user_groups = ratings.groupby('userId')['rating'].count()
    top_users = user_groups.sort_values(ascending=False)[:15]

    movie_groups = ratings.groupby('movieId')['rating'].count()
    top_movies = movie_groups.sort_values(ascending=False)[:15]

    top = (
        ratings.join(top_users,rsuffix='_r', how='inner', on='userId' ).
        join(top_movies, rsuffix='_r', how='inner', on='movieId')
    )

    return pd.crosstab(top.userId, top.movieId, top.rating, aggfunc=np.sum)

In [81]:
tabular_view(ratings,movies)

  top_users = user_groups.sort_values(ascending=False)[:15]
  top_movies = movie_groups.sort_values(ascending=False)[:15]


movieId,110,260,480,589,593,608,1196,1198,1210,1270,1580,2028,2571,2762,2858
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
889,4.0,4.0,3.0,5.0,5.0,4.0,4.0,,3.0,4.0,3.0,3.0,5.0,,2.0
1015,4.0,5.0,4.0,5.0,5.0,5.0,4.0,5.0,4.0,4.0,4.0,5.0,5.0,5.0,4.0
1150,2.0,5.0,,2.0,3.0,5.0,4.0,2.0,3.0,2.0,2.0,2.0,1.0,2.0,4.0
1181,3.0,4.0,2.0,5.0,3.0,3.0,4.0,3.0,3.0,3.0,4.0,4.0,5.0,4.0,3.0
1449,3.0,3.0,2.0,2.0,5.0,5.0,3.0,4.0,2.0,2.0,4.0,3.0,4.0,4.0,4.0
1680,1.0,2.0,5.0,5.0,5.0,5.0,5.0,5.0,3.0,3.0,4.0,5.0,3.0,5.0,5.0
1941,5.0,5.0,5.0,3.0,5.0,4.0,5.0,5.0,5.0,5.0,5.0,5.0,3.0,5.0,1.0
1980,4.0,4.0,4.0,4.0,5.0,5.0,4.0,5.0,4.0,5.0,4.0,5.0,5.0,5.0,5.0
2063,5.0,4.0,4.0,2.0,5.0,2.0,4.0,4.0,4.0,4.0,3.0,2.0,5.0,4.0,5.0
2909,5.0,5.0,5.0,4.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,4.0,5.0,5.0
