In [25]:
import pandas as pd
from zipfile import ZipFile
from io import BytesIO
import requests

# URL of the ZIP file
url = 'https://files.grouplens.org/datasets/movielens/ml-latest-small.zip'

# Download the ZIP file and extract its contents
response = requests.get(url)
with ZipFile(BytesIO(response.content)) as z:
    # Check the content of the ZIP file
    print(z.namelist())  # This will list all files within the ZIP archive
    
    # Read ratings.csv from the ZIP archive
    with z.open('ml-latest-small/ratings.csv') as f:
        df = pd.read_csv(f)

# Display the first few rows of the DataFrame
print(df.head())

# Install necessary libraries (uncomment the line below if running in an environment where libraries need to be installed)
# !pip install numpy pandas scikit-surprise

# Import libraries
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate

# Use the Reader class from surprise to parse the DataFrame
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)

# Use the SVD algorithm from surprise
algo = SVD()

# Perform cross-validation and print results
results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)


['ml-latest-small/', 'ml-latest-small/links.csv', 'ml-latest-small/tags.csv', 'ml-latest-small/ratings.csv', 'ml-latest-small/README.txt', 'ml-latest-small/movies.csv']
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8753  0.8711  0.8758  0.8877  0.8689  0.8758  0.0065  
MAE (testset)     0.6688  0.6688  0.6735  0.6828  0.6685  0.6725  0.0055  
Fit time          1.22    1.26    1.23    1.26    1.32    1.26    0.03    
Test time         0.14    0.31    0.13    0.18    0.10    0.17    0.08    
