In [1]:
import os
import psycopg2
import pandas as pd
from dotenv import load_dotenv

In [2]:


load_dotenv()  # Nécessaire pour charger les variables d'environnement précédemment définies

# Créer une connexion à postgres
connection = psycopg2.connect(host=os.environ.get('PG_HOST'),
                        user=os.environ.get('PG_USER'),
                        password=os.environ.get('PG_PASSWORD'),
                        dbname=os.environ.get('PG_DATABASE'))

connection.autocommit = True  # Assurez-vous que les données sont ajoutées à la base de données immédiatement après les commandes d'écriture.
cursor = connection.cursor()
cursor.execute('SELECT %s as connected;', ('Connection à postgres Réussie!',))
print(cursor.fetchone())

def postgresql_to_dataframe(conn, select_query, column_names):
    """
    Transformer une requête SELECT en un dataframe pandas
    """
    cursor = conn.cursor()
    try:
        cursor.execute(select_query)
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        cursor.close()
        return 1
    
    # Naturellement, nous obtenons une liste de "tupples".
    tupples = cursor.fetchall()
    cursor.close()
    
    # Nous devons juste le transformer en dataframe pandas.
    df = pd.DataFrame(tupples, columns=column_names)
    return df

conn = connection
column_names = ["user_id","gender", "age", "height", "weight", "duration", "heart_rate", "body_temp", "calorie"]
# Execute the "SELECT *" query
df_db = postgresql_to_dataframe(conn, 
"SELECT persons.user_id as id, gender, age, height, weight, duration, heart_rate, body_temp,calorie FROM calories INNER JOIN persons ON calories.user_id = persons.user_id"
                                , column_names)
df_db.head()

('Connection à postgres Réussie!',)


Unnamed: 0,user_id,gender,age,height,weight,duration,heart_rate,body_temp,calorie
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8,231.0
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3,66.0
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7,26.0
3,16180408,female,34,179.0,71.0,13.0,100.0,40.5,71.0
4,17771927,female,27,154.0,58.0,10.0,81.0,39.8,35.0


In [3]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler

ohe=OneHotEncoder(sparse=False)
scaler = MinMaxScaler()
ct=make_column_transformer((ohe,['gender']),
                      (scaler,  ['duration','heart_rate','body_temp']),remainder='passthrough')
data_ct=ct.fit_transform(df_db)

# Encodage sur le genre et retransformation en dataframe pandas nommé dum
ohe_True=OneHotEncoder(sparse=True)
ohe_with_sparse=ohe_True.fit_transform(df_db['gender'].values.reshape(-1,1)).toarray()
ohe_with_sparse

dataframe_app=pd.get_dummies(df_db,columns=['gender'],prefix='',prefix_sep='')

In [4]:
dataframe_app.head(2)

Unnamed: 0,user_id,age,height,weight,duration,heart_rate,body_temp,calorie,female,male
0,14733363,68,190.0,94.0,29.0,105.0,40.8,231.0,0,1
1,14861698,20,166.0,60.0,14.0,94.0,40.3,66.0,1,0


In [5]:
dataframe_app = dataframe_app.drop(['user_id'], axis=1)

In [6]:
dataframe_app.head(2)

Unnamed: 0,age,height,weight,duration,heart_rate,body_temp,calorie,female,male
0,68,190.0,94.0,29.0,105.0,40.8,231.0,0,1
1,20,166.0,60.0,14.0,94.0,40.3,66.0,1,0


In [7]:
dataframe_app['Height_meters'] = dataframe_app['height']/100

In [8]:
dataframe_app['calculated_IMC'] = (dataframe_app['weight']/dataframe_app['Height_meters']**2)

In [9]:
dataframe_app.head(2)

Unnamed: 0,age,height,weight,duration,heart_rate,body_temp,calorie,female,male,Height_meters,calculated_IMC
0,68,190.0,94.0,29.0,105.0,40.8,231.0,0,1,1.9,26.038781
1,20,166.0,60.0,14.0,94.0,40.3,66.0,1,0,1.66,21.773842


In [10]:
dataframe_app = dataframe_app.drop(['height','weight','Height_meters'], axis=1)

In [11]:
dataframe_app.head(2)

Unnamed: 0,age,duration,heart_rate,body_temp,calorie,female,male,calculated_IMC
0,68,29.0,105.0,40.8,231.0,0,1,26.038781
1,20,14.0,94.0,40.3,66.0,1,0,21.773842


In [12]:
#dataframe_app.to_csv('dataframe_app.csv',index=False)

In [13]:
# MANIPULATION DES DONNÉES
import mlflow
import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport
import matplotlib.pyplot as plt

# DIVISER LES DONNEES
from sklearn.model_selection import train_test_split

# METTRE A L ECHELLE
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler

# VALIDATION CROISÉE DES DONNÉES
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import KFold

# MODÈLES DE MACHINE LEARNING
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor

# METRIQUES DE PERFORMANCE
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error

# INTERPRETATION DES MODÈLES
from sklearn.model_selection import learning_curve
import  yellowbrick

# AUTRE
import pickle
import requests.models
import IPython, ipywidgets
from PIL import Image


In [14]:
df = dataframe_app[['age','duration','heart_rate','female','male','calculated_IMC','calorie']]

In [15]:
df.head(2)

Unnamed: 0,age,duration,heart_rate,female,male,calculated_IMC,calorie
0,68,29.0,105.0,0,1,26.038781,231.0
1,20,14.0,94.0,1,0,21.773842,66.0


In [16]:
X = df.drop(['calorie'], axis=1)
y = df.calorie



In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

In [18]:
clf = RandomForestRegressor(n_estimators=300, max_depth=5, max_leaf_nodes=20)

In [19]:
clf.fit(X_train, y_train)

RandomForestRegressor(max_depth=5, max_leaf_nodes=20, n_estimators=300)

In [20]:
y_pred = clf.predict(X_test)

In [21]:
R2 = r2_score(y_test, y_pred)

In [22]:
MAE = mean_absolute_error(y_test, y_pred)

In [23]:
MAE

8.756513052438239

In [24]:
with open('ADABOOST_6_features_pkl', 'wb') as files:
    pickle.dump(clf, files)

In [25]:
X[0:1]

Unnamed: 0,age,duration,heart_rate,female,male,calculated_IMC
0,68,29.0,105.0,0,1,26.038781


In [26]:
test = clf.predict(X[0:1])
test2 = clf.predict(X[1:2])

In [27]:
X[1:2]

Unnamed: 0,age,duration,heart_rate,female,male,calculated_IMC
1,20,14.0,94.0,1,0,21.773842


In [28]:
test

array([195.34395158])

In [29]:
test2

array([69.08025001])

In [30]:
X[2:3]

Unnamed: 0,age,duration,heart_rate,female,male,calculated_IMC
2,69,5.0,88.0,0,1,24.65591


In [31]:
test3 = clf.predict(X[2:3])

In [32]:
test3

array([16.99860767])