In [23]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, BaggingRegressor, GradientBoostingRegressor
from sklearn.datasets import make_regression
from sklearn.linear_model import  TheilSenRegressor, RANSACRegressor, HuberRegressor, PassiveAggressiveRegressor, SGDRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.svm import SVR, NuSVR
from sklearn.neighbors import KNeighborsRegressor
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.cluster import KMeans

### This is the Top 100 anime rating dataset 

In [24]:
df = pd.read_csv('../input/top-100-anime-dataset-ratings/final_anime_dataset.csv')
df.head()

- I have used ```No. of episodes```, ```TV show``` and the rest of the Genre categories as the input features, with ```Rating``` as the output variable.

- Here I have created a function ```reg_func``` to make it easier to implement the machine learning algorithms

In [25]:
X = df[['TV show','Genre: magic', 'Genre: adventure', 'Genre: psychological',
       'Genre: comedy', 'Genre: drama', 'Genre: romance', 'Genre: mystery',
       'Genre: action', 'Genre: fantasy']]
Y = df['Rating']
x_train, x_test,y_train,y_test = train_test_split(X,Y,test_size =0.2)
def reg_func(model):
       model.fit(x_train,y_train)
       y_pred=model.predict(x_test)
       score=mean_squared_error(y_test,y_pred)
       print('Mean Squared Error: '+str(score))


- ### Linear Regression

In [26]:

model = LinearRegression()
reg_func(model)


- ### TheilSen Regressor

In [27]:
model = TheilSenRegressor()
reg_func(model)

- ### RANSAC Regressor

In [28]:
model = RANSACRegressor()
reg_func(model)

- ### Huber Regressor

In [29]:
model = HuberRegressor()
reg_func(model)

- ### Passive Aggressive Regressor

In [30]:
model=PassiveAggressiveRegressor()
reg_func(model)

- ### Gaussian Process Regressor

In [31]:
model = GaussianProcessRegressor(normalize_y=True)
reg_func(model)

- ### Support Vector Machine

In [32]:
model = SVR()
reg_func(model)

- ### NU Support Vector Regression

In [33]:
model = NuSVR()
reg_func(model)

- ### KNNeighbours as Regressor

In [48]:
model= KNeighborsRegressor(n_neighbors=5)
reg_func(model)

- ### Stochastic Gradient Descent

In [50]:
model= SGDRegressor(n_iter_no_change=750)
reg_func(model)

- ### Kernal Ridge Regression

In [36]:
model= KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)
reg_func(model)

- ### Decision Tree

In [37]:
model=DecisionTreeRegressor(random_state=0)
reg_func(model)

- ### Random Forest

In [38]:

regressor = RandomForestRegressor(n_estimators = 20, random_state = 0)
reg_func(regressor)


- ### Extra Trees

In [39]:
model= ExtraTreesRegressor(n_estimators=20,random_state=0)
reg_func(model)

- ### Bagging Regressor

In [40]:
model= BaggingRegressor(n_estimators=20,random_state=0)
reg_func(model)

## Boosting Techniques

- ### ADABoost Regressor

In [41]:
model=AdaBoostRegressor(n_estimators=20,random_state=0)
reg_func(model)

- ### XGBoost

In [42]:
xgb_r = xgb.XGBRegressor(n_estimators = 20, random_state= 0,gamma=1,subsample=0.1)
reg_func(xgb_r)


- ### Gradient Boosting

In [43]:
model=GradientBoostingRegressor(n_estimators = 20, random_state= 0,learning_rate=0.1)
reg_func(model)

- ## Evaluating Algorithms

- As shown above ``` Mean Squared Error``` is the lowest for the **XGBoost** algorithm with 0.024. Hence it is the best working model for this dataset
- The highest loss is for **Decision Tree**, since the input data for prediction contains 95% categorical features this results in sparse data reducing the efficiency of the tree-based models. 
- This also explains why linear regression models have a range of MSE between 0.02-0.04, while tree-based models go upto 0.06.