In [1]:
import pandas as pd
import numpy as np
import polars as pl
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
import gzip
import shutil
import pathlib
import os
import sqlalchemy
import sqlite3
import spacy
import spacy_cleaner
from spacy_cleaner import processing, Cleaner
import re
import tqdm
from tqdm.notebook import tqdm, trange
import ipywidgets as widgets
from ipywidgets import IntProgress, HTML, VBox
from IPython.display import display
import time
import timeit
import sklearn

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestRegressor
import cupy as cp
import xgboost
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error

from transformers import BertTokenizer, BertForSequenceClassification
import torch
from torch.nn.functional import softmax

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
sid_obj = SentimentIntensityAnalyzer()

%matplotlib inline
alt.data_transformers.disable_max_rows()


#spacy.prefer_gpu()
spacy.require_gpu(gpu_id=0)
nlp = spacy.load("en_core_web_sm")

# This loads a larger and more robust model. Use with caution though because it takes considerably longer to run
#nlp = spacy.load("en_core_web_trf")

#nlp = spacy.load('/path/to/en_core_web_sm')

--------------------------------------------------------------------------------

  CuPy may not function correctly because multiple CuPy packages are installed
  in your environment:

    cupy-cuda11x, cupy-cuda12x

  Follow these steps to resolve this issue:

    1. For all packages listed above, run the following command to remove all
       existing CuPy installations:

         $ pip uninstall <package_name>

      If you previously installed CuPy via conda, also run the following:

         $ conda uninstall cupy

    2. Install the appropriate CuPy package.
       Refer to the Installation Guide for detailed instructions.

         https://docs.cupy.dev/en/stable/install.html

--------------------------------------------------------------------------------



In [2]:
def print_files_in_directory(directory_path):
    with os.scandir(directory_path) as entries:
        for entry in entries:
            if entry.is_file():
                print(entry.name)

def get_sentiment_score(text):
    sentiment_scores = sid_obj.polarity_scores(text)
    return sentiment_scores

def clean_text(text):
    doc = nlp(text)

    cleaned_tokens = [token.lemma_.lower().strip() for token in doc if not token.is_punct and not token.is_space]
    cleaned_tokens = [token for token in cleaned_tokens if not nlp.vocab[token].is_stop]
    cleaned_text = ' '.join(cleaned_tokens)

    return cleaned_text

def remove_numbers(text):
    cleaner = Cleaner(
        nlp,
        processing.remove_number_token
    )
    return cleaner.clean(text)

def remove_numbers_regex(text):
    # Pattern to remove numbers from text data
    pattern = r"\d+"

    return re.sub(pattern,"",text)

In [3]:
conn = sqlite3.connect(r"C:/Users/asl4a/AirBnB_Data.db")
cursor = conn.cursor()

In [4]:
def read_data_in_batches(file_path,read_batch_size,file_type):
    if file_type == 'csv':
        reader = pd.read_csv(file_path,chunksize=read_batch_size)
        for batch_df in reader:
            yield batch_df
    if file_type == 'parquet':
        pd.read_parquet(file_path,chunksize=read_batch_size)
        for batch_df in reader:
            yield batch_df
    else:
        "No file type specified. Please specify either csv or parquet"

In [15]:
parquet_file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.parquet"
csv_file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.csv"

file_type = 'csv'
if file_type == 'parquet':
    file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.parquet"
if file_type == 'csv':
    file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.csv"

read_batch_size = 1000
batch_size = 500

xgb_model = XGBRegressor(device='cuda',tree_method='hist',eval_metric='rmse',early_stopping_rounds=10)

tfidf_vectorizer = TfidfVectorizer(analyzer='word',ngram_range=(1,4))


for b, batch_df in enumerate(read_data_in_batches(file_path=file_path, read_batch_size=read_batch_size, file_type=file_type)):
    X = batch_df['cleaned_text']
    y = batch_df['review_scores_rating']

    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train,y_train,test_size=0.2,random_state=42)

    X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
    X_test_tfidf = tfidf_vectorizer.transform(X_test)
    X_val_tfidf = tfidf_vectorizer.transform(X_val)

    X_test_gpu = cp.array(X_test_tfidf.toarray())
    X_val_gpu = cp.array(X_val_tfidf.toarray())
    y_test_gpu = cp.array(y_test)
    y_val_gpu = cp.array(y_val)


    num_batches = int(np.ceil(X_train_tfidf.shape[0] / batch_size))

    start_idx = 0
    end_idx = min(batch_size, X_train_tfidf.shape[0])

    X_train_batch_gpu = cp.array(X_train_tfidf[start_idx:end_idx].toarray())
    y_train_batch_gpu = cp.array(y_train[start_idx:end_idx])

    if b == 0:
        xgb_model.fit(X_train_batch_gpu, y_train_batch_gpu,eval_set=[(X_val_gpu,y_val_gpu)],verbose=True)
    else:
        pass

    for i in range(1,num_batches):
        print("Beginning to iterate over batches")
        start_idx = i * batch_size
        end_idx = min((i+1) * batch_size, X_train_tfidf.shape[0])

        X_train_batch_gpu = cp.array(X_train_tfidf[start_idx:end_idx].toarray())
        y_train_batch_gpu = cp.array(y_train[start_idx:end_idx])

        xgb_model.fit(X_train_batch_gpu,y_train_batch_gpu,xgb_model.get_booster().best_iteration,eval_set=[(X_val_gpu,y_val_gpu)],verbose=True)

        y_pred_gpu = xgb_model.predict(X_test_gpu)

        rmse = np.sqrt(mean_squared_error(y_test_gpu.get(), y_pred_gpu))
        print(f"Batch {i+1}/{num_batches} - RMSE: {rmse}")

    print('MODEL EVAL FOR THIS BATCH')
    y_pred_gpu = xgb_model.predict(X_test_gpu)
    final_rmse = np.sqrt(mean_squared_error(y_test_gpu.get(), y_pred_gpu))
    print(final_rmse)

if file_type == 'csv':
    df_revs = pd.read_csv(file_path)
if file_type == 'parquet':
    df_revs = pd.read_parquet(file_path)

X = df_revs['cleaned_text']
y = df_revs['review_scores_rating']

X_test_tfidf = tfidf_vectorizer.transform(X)
X_test_gpu = cp.array(X_test_tfidf.toarray())
y_test_gpu = cp.array(y)

y_pred_gpu = xgb_model.predict(X_test_gpu)
final_rmse = np.sqrt(mean_squared_error(y_test_gpu.get(), y_pred_gpu))
print(f"Final RMSE: {final_rmse}")

[0]	validation_0-rmse:0.00382
[1]	validation_0-rmse:0.00347
[2]	validation_0-rmse:0.00317
[3]	validation_0-rmse:0.00290
[4]	validation_0-rmse:0.00278
[5]	validation_0-rmse:0.00272
[6]	validation_0-rmse:0.00273
[7]	validation_0-rmse:0.00275
[8]	validation_0-rmse:0.00275
[9]	validation_0-rmse:0.00278
[10]	validation_0-rmse:0.00275
[11]	validation_0-rmse:0.00277
[12]	validation_0-rmse:0.00279
[13]	validation_0-rmse:0.00281
[14]	validation_0-rmse:0.00283
[15]	validation_0-rmse:0.00285
Beginning to iterate over batches




TypeError: ('Unsupported type for weight', "<class 'int'>")

CPU Version

In [16]:
parquet_file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.parquet"
csv_file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.csv"

file_type = 'csv'
if file_type == 'parquet':
    file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.parquet"
if file_type == 'csv':
    file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.csv"

read_batch_size = 10000
batch_size = 1000

xgb_model = XGBRegressor(eval_metric='rmse',early_stopping_rounds=25)

tfidf_vectorizer = TfidfVectorizer(analyzer='word',ngram_range=(1,4))


for b, batch_df in enumerate(read_data_in_batches(file_path=file_path, read_batch_size=read_batch_size, file_type=file_type)):
    batch_df['cleaned_text'] = batch_df['cleaned_text'].astype(str)
    batch_df = batch_df.dropna(how='any',axis=0)
    X = batch_df['cleaned_text']
    y = batch_df['review_scores_rating']

    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train,y_train,test_size=0.2,random_state=42)

    X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
    X_test_tfidf = tfidf_vectorizer.transform(X_test)
    X_val_tfidf = tfidf_vectorizer.transform(X_val)



    num_batches = int(np.ceil(X_train_tfidf.shape[0] / batch_size))

    start_idx = 0
    end_idx = min(batch_size, X_train_tfidf.shape[0])

    X_train_batch = X_train_tfidf[start_idx:end_idx]
    y_train_batch = y_train[start_idx:end_idx]

    if b == 0:
        xgb_model.fit(X_train_batch, y_train_batch,eval_set=[(X_val_tfidf,y_val)],verbose=True)
    else:
        pass

    for i in range(1,num_batches):
        print("Beginning to iterate over batches")
        start_idx = i * batch_size
        end_idx = min((i+1) * batch_size, X_train_tfidf.shape[0])

        X_train_batch = X_train_tfidf[start_idx:end_idx]
        y_train_batch = y_train[start_idx:end_idx]

        xgb_model.fit(X_train_batch,y_train_batch,xgb_model.get_booster().best_iteration,eval_set=[(X_val_tfidf,y_val)],verbose=True)

        y_pred = xgb_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        print(f"Batch {i+1}/{num_batches} - RMSE: {rmse}")

    print('MODEL EVAL FOR THIS BATCH')
    y_pred = xgb_model.predict(X_test)
    final_rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(final_rmse)

if file_type == 'csv':
    df_revs = pd.read_csv(file_path)
if file_type == 'parquet':
    df_revs = pd.read_parquet(file_path)

df_revs['cleaned_text'] = df_revs['cleaned_text'].astype(str)
df_revs = df_revs.dropna(how='any',axis=0)

X = df_revs['cleaned_text']
y = df_revs['review_scores_rating']

X_test_tfidf = tfidf_vectorizer.transform(X)
X_test = X_test_tfidf
y_test = y

y_pred = xgb_model.predict(X_test)
final_rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Final RMSE: {final_rmse}")

[0]	validation_0-rmse:0.08084
[1]	validation_0-rmse:0.07806
[2]	validation_0-rmse:0.07570
[3]	validation_0-rmse:0.07495
[4]	validation_0-rmse:0.07435
[5]	validation_0-rmse:0.07393
[6]	validation_0-rmse:0.07302
[7]	validation_0-rmse:0.07281
[8]	validation_0-rmse:0.07213
[9]	validation_0-rmse:0.07226
[10]	validation_0-rmse:0.07207
[11]	validation_0-rmse:0.07228
[12]	validation_0-rmse:0.07209
[13]	validation_0-rmse:0.07212
[14]	validation_0-rmse:0.07225
[15]	validation_0-rmse:0.07261
[16]	validation_0-rmse:0.07249
[17]	validation_0-rmse:0.07254
[18]	validation_0-rmse:0.07240
[19]	validation_0-rmse:0.07247
[20]	validation_0-rmse:0.07264
[21]	validation_0-rmse:0.07265
[22]	validation_0-rmse:0.07279
[23]	validation_0-rmse:0.07282
[24]	validation_0-rmse:0.07291
[25]	validation_0-rmse:0.07290
[26]	validation_0-rmse:0.07302
[27]	validation_0-rmse:0.07307
[28]	validation_0-rmse:0.07306
[29]	validation_0-rmse:0.07328
[30]	validation_0-rmse:0.07335
[31]	validation_0-rmse:0.07338
[32]	validation_0-



TypeError: ('Unsupported type for weight', "<class 'int'>")

In [17]:
parquet_file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.parquet"
csv_file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.csv"

file_type = 'csv'
if file_type == 'parquet':
    file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.parquet"
if file_type == 'csv':
    file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.csv"

read_batch_size = 10000

xgb_model = XGBRegressor(eval_metric='rmse',early_stopping_rounds=25)

tfidf_vectorizer = TfidfVectorizer(analyzer='word',ngram_range=(1,4))

batch_size = 2000

for i, batch_df in enumerate(read_data_in_batches(file_path, read_batch_size=read_batch_size,file_type=file_type)):
    X_train = batch_df['cleaned_text']  # Adjust column name accordingly
    y_train = batch_df['review_scores_rating']  # Adjust column name accordingly

    # TF-IDF vectorization
    X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)


    if i == 0:
        xgb_model.fit(X_train_tfidf, y_train, eval_set=[(X_train_tfidf, y_train)], verbose=True)
    else:
        xgb_model.fit(X_train_tfidf, y_train, xgb_model.get_booster().best_iteration, eval_set=[(X_train_tfidf, y_train)], verbose=True)

    print(f"Batch {i+1} trained")

# Final evaluation on the entire dataset
# Load entire dataset into memory for evaluation
# Adjust this part according to your memory constraints
#X, y = pd.read_csv(file_path).drop(columns=['review_scores_rating']).values, pd.read_csv(file_path)['review_scores_rating'].values
df_revs = pd.read_csv(file_path)
X = df_revs['cleaned_text']
y = df_revs['review_scores_rating']

X_test_tfidf = tfidf_vectorizer.transform(X)

y_pred = xgb_model.predict(X_test)
final_rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Final RMSE: {final_rmse}")

[0]	validation_0-rmse:0.07453
[1]	validation_0-rmse:0.06747
[2]	validation_0-rmse:0.06320
[3]	validation_0-rmse:0.06051
[4]	validation_0-rmse:0.05838
[5]	validation_0-rmse:0.05698
[6]	validation_0-rmse:0.05571
[7]	validation_0-rmse:0.05483
[8]	validation_0-rmse:0.05411
[9]	validation_0-rmse:0.05346
[10]	validation_0-rmse:0.05283
[11]	validation_0-rmse:0.05230
[12]	validation_0-rmse:0.05163
[13]	validation_0-rmse:0.05089
[14]	validation_0-rmse:0.05048
[15]	validation_0-rmse:0.04998
[16]	validation_0-rmse:0.04962
[17]	validation_0-rmse:0.04925
[18]	validation_0-rmse:0.04864
[19]	validation_0-rmse:0.04815
[20]	validation_0-rmse:0.04784
[21]	validation_0-rmse:0.04743
[22]	validation_0-rmse:0.04709
[23]	validation_0-rmse:0.04680
[24]	validation_0-rmse:0.04652
[25]	validation_0-rmse:0.04620
[26]	validation_0-rmse:0.04589
[27]	validation_0-rmse:0.04558
[28]	validation_0-rmse:0.04536
[29]	validation_0-rmse:0.04502
[30]	validation_0-rmse:0.04473
[31]	validation_0-rmse:0.04443
[32]	validation_0-



TypeError: ('Unsupported type for weight', "<class 'int'>")

In [None]:
parquet_file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.parquet"
csv_file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.csv"

file_type = 'csv'
if file_type == 'parquet':
    file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.parquet"
if file_type == 'csv':
    file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.csv"

read_batch_size = 10000

xgb_model = XGBRegressor(eval_metric='rmse')

tfidf_vectorizer = TfidfVectorizer(analyzer='word',ngram_range=(1,4))

#batch_size = 1000

for i, batch_df in enumerate(read_data_in_batches(file_path, read_batch_size=read_batch_size,file_type=file_type)):
    X = batch_df['cleaned_text']  # Adjust column name accordingly
    y = batch_df['review_scores_rating']  # Adjust column name accordingly

    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

    # TF-IDF vectorization
    X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
    X_test_tfidf = tfidf_vectorizer.transform(X_test)

    xgb_model.fit(X_train_tfidf, y_train,verbose=True)

    #if i == 0:
    #    xgb_model.fit(X_train_tfidf, y_train,verbose=True)
    #else:
    #    xgb_model.fit(X_train_tfidf, y_train, xgb_model.get_booster().best_iteration, verbose=True)

    print(f"Batch {i+1} trained")

    y_pred = xgb_model.predict(X_test_tfidf)
    rmse_batch = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f"Batch RMSE: {rmse_batch}")

# Final evaluation on the entire dataset
# Load entire dataset into memory for evaluation
# Adjust this part according to your memory constraints
#X, y = pd.read_csv(file_path).drop(columns=['review_scores_rating']).values, pd.read_csv(file_path)['review_scores_rating'].values
df_revs = pd.read_csv(file_path)
X = df_revs['cleaned_text']
y = df_revs['review_scores_rating']

X_test_tfidf = tfidf_vectorizer.transform(X)

y_pred = xgb_model.predict(X_test)
final_rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Final RMSE: {final_rmse}")

Batch 1 trained
Batch RMSE: 0.058871860893491425
Batch 2 trained
Batch RMSE: 0.10336071046890884
Batch 3 trained
Batch RMSE: 0.10785371048872416
Batch 4 trained
Batch RMSE: 0.08984109708062324
Batch 5 trained
Batch RMSE: 0.06392823811740111
Batch 6 trained
Batch RMSE: 0.10673308524501696
Batch 7 trained
Batch RMSE: 0.10355280569407276
Batch 8 trained
Batch RMSE: 0.06755946618097375
Batch 9 trained
Batch RMSE: 0.07644626886417555
Batch 10 trained
Batch RMSE: 0.09044971967136727
Batch 11 trained
Batch RMSE: 0.0680900917779635
Batch 12 trained
Batch RMSE: 0.06174732356730993
Batch 13 trained
Batch RMSE: 0.06943409577146745
Batch 14 trained
Batch RMSE: 0.08324829279152685
Batch 15 trained
Batch RMSE: 0.09523334395094411
Batch 16 trained
Batch RMSE: 0.07925144945996636
Batch 17 trained
Batch RMSE: 0.056612941218562945
Batch 18 trained
Batch RMSE: 0.09118223565677497
Batch 19 trained
Batch RMSE: 0.10079731275344894
Batch 20 trained
Batch RMSE: 0.08093502363086429
Batch 21 trained
Batch RMSE:

In [6]:
parquet_file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.parquet"
csv_file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.csv"

file_type = 'csv'
if file_type == 'parquet':
    file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.parquet"
if file_type == 'csv':
    file_path = r"F:\Data Science\Datasets\AirBnB Reviews Regression Inputs\combined_processed_reviews.csv"

read_batch_size = 1000
batch_size = 500

xgb_model = XGBRegressor(device='cuda',tree_method='hist',eval_metric='rmse')

tfidf_vectorizer = TfidfVectorizer(analyzer='word',ngram_range=(1,4))


for b, batch_df in enumerate(read_data_in_batches(file_path=file_path, read_batch_size=read_batch_size, file_type=file_type)):
    X = batch_df['cleaned_text']
    y = batch_df['review_scores_rating']

    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
    #X_train, X_val, y_train, y_val = train_test_split(X_train,y_train,test_size=0.2,random_state=42)

    X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
    X_test_tfidf = tfidf_vectorizer.transform(X_test)
    #X_val_tfidf = tfidf_vectorizer.transform(X_val)

    X_test_gpu = cp.array(X_test_tfidf.toarray())
    #X_val_gpu = cp.array(X_val_tfidf.toarray())
    y_test_gpu = cp.array(y_test)
    #y_val_gpu = cp.array(y_val)


    num_batches = int(np.ceil(X_train_tfidf.shape[0] / batch_size))

    start_idx = 0
    end_idx = min(batch_size, X_train_tfidf.shape[0])

    X_train_batch_gpu = cp.array(X_train_tfidf[start_idx:end_idx].toarray())
    y_train_batch_gpu = cp.array(y_train[start_idx:end_idx])

    if b == 0:
        xgb_model.fit(X_train_batch_gpu, y_train_batch_gpu,verbose=True)
    else:
        pass

    for i in range(1,num_batches):
        #print("Beginning to iterate over batches")
        start_idx = i * batch_size
        end_idx = min((i+1) * batch_size, X_train_tfidf.shape[0])

        X_train_batch_gpu = cp.array(X_train_tfidf[start_idx:end_idx].toarray())
        y_train_batch_gpu = cp.array(y_train[start_idx:end_idx])

        xgb_model.fit(X_train_batch_gpu,y_train_batch_gpu,verbose=True)

        y_pred_gpu = xgb_model.predict(X_test_gpu)

        rmse = np.sqrt(mean_squared_error(y_test_gpu.get(), y_pred_gpu))
        print(rmse)
        #print(f"Batch {i+1}/{num_batches} - RMSE: {rmse}")
    print(f"Batch {b+1/len(batch_df)*100} - RMSE: {rmse}")
    #print('MODEL EVAL FOR THIS BATCH')
    y_pred_gpu = xgb_model.predict(X_test_gpu)
    final_rmse = np.sqrt(mean_squared_error(y_test_gpu.get(), y_pred_gpu))
    #print(final_rmse)

if file_type == 'csv':
    df_revs = pd.read_csv(file_path)
if file_type == 'parquet':
    df_revs = pd.read_parquet(file_path)

X = df_revs['cleaned_text']
y = df_revs['review_scores_rating']

X_test_tfidf = tfidf_vectorizer.transform(X)
X_test_gpu = cp.array(X_test_tfidf.toarray())
y_test_gpu = cp.array(y)

y_pred_gpu = xgb_model.predict(X_test_gpu)
final_rmse = np.sqrt(mean_squared_error(y_test_gpu.get(), y_pred_gpu))
print(f"Final RMSE: {final_rmse}")

0.0030656461473736207
Batch 0.1 - RMSE: 0.0030656461473736207
0.029822712686545464
Batch 1.1 - RMSE: 0.029822712686545464
1.3351440397713077e-07
Batch 2.1 - RMSE: 1.3351440397713077e-07
0.028095447761404015
Batch 3.1 - RMSE: 0.028095447761404015
0.02983339884311707
Batch 4.1 - RMSE: 0.02983339884311707
0.052110878831602085
Batch 5.1 - RMSE: 0.052110878831602085
0.020297288958366192
Batch 6.1 - RMSE: 0.020297288958366192
0.0940410952372377
Batch 7.1 - RMSE: 0.0940410952372377
0.08776180188719572
Batch 8.1 - RMSE: 0.08776180188719572
0.036791722090518925
Batch 9.1 - RMSE: 0.036791722090518925
0.2324735774878032
Batch 10.1 - RMSE: 0.2324735774878032
0.05089731078130077
Batch 11.1 - RMSE: 0.05089731078130077
0.02963053453456399
Batch 12.1 - RMSE: 0.02963053453456399
0.02717858484575979
Batch 13.1 - RMSE: 0.02717858484575979
0.021683895954929033
Batch 14.1 - RMSE: 0.021683895954929033
0.06851429477027829
Batch 15.1 - RMSE: 0.06851429477027829
0.006289642322411041
Batch 16.1 - RMSE: 0.006289

PermissionError: [Errno 13] Permission denied