In [89]:
import os
import pandas as pd
from datetime import datetime

# writes the output on 'cleaned_data.csv' by default
def clean_data(df, output_file='cleaned_data.csv'):
    """Makes an initial clean in a dataframe.

    Args:
        df (pd.DataFrame): A dataframe to clean.

    Returns:
        pd.DataFrame: the cleaned dataframe.
    """
    df["output_date"] = pd.to_datetime(df.output_date)
    df["day"]=df["output_date"].dt.day
    df.drop("output_date",axis=1,inplace=True)
    
    # Saves a copy
    cleaned_data = os.path.join(output_file)
    df.to_csv(cleaned_data)

    return df

if __name__ == "__main__":
    # Reads the file train.csv
    train_file = os.path.join('output_data.csv')

    if os.path.exists(train_file):
        df = pd.read_csv(train_file)
        print(f'Original Data: {df.shape}')
        cleaned_df = clean_data(df)
        print(f'After Cleaning: {cleaned_df.shape}')
    else:
        print(f'File not found {train_file}')

Original Data: (19710, 9)
After Cleaning: (19710, 9)


In [90]:
df['k']=df['output_own_price']-df['output_own_cost']
df['profits']= df['k']*df['output_own_sales']
data = df.drop('k', axis=1)
X=data.drop("output_own_price",axis=1)
y=data["output_own_price"] 

In [91]:
import os
import pandas as pd
from datetime import datetime
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV
from sklearn.metrics import mean_squared_error,r2_score




def create_train_test_data(dataset):
    # load and split the data
    data_train = dataset.sample(frac=0.8, random_state=30).reset_index(drop=True)
    data_test = dataset.drop(data_train.index).reset_index(drop=True)

    # save the data
    data_train.to_csv('train.csv', index=False)
    data_test.to_csv('test.csv', index=False)

    print(f"Train data for modeling: {data_train.shape}")
    print(f"Test data for predictions: {data_test.shape}")
    

    # Loads the data for the model training
train = pd.read_csv('train.csv', keep_default_na=False)
x_train = train.drop(columns=['output_own_price'])
y_train = train['output_own_price']

    # Loads the data for the model testing
test = pd.read_csv('test.csv', keep_default_na=False)
x_test = test.drop(columns=['output_own_price'])
y_test = test['output_own_price']


In [92]:
import plotly.express as px
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import pickle
import numpy as np




def train_model(x_train, y_train):

    print("Training the model ...")
ada=AdaBoostRegressor().fit(x_train,y_train)
dtc=DecisionTreeRegressor().fit(x_train,y_train)
rf=RandomForestRegressor().fit(x_train,y_train)
xgb=XGBRegressor().fit(x_train,y_train)
lgb=LGBMRegressor().fit(x_train,y_train)
catbost=CatBoostRegressor().fit(x_train,y_train)
    
models=[ada,dtc,rf,xgb,lgb,catbost]



def ML(y,models):
    accuary=models.score(x_train,y_train)
    return accuary

for i in models:
        print(i,"Algorithm succed rate :",ML("survived",i))


def export_model(model):
    # Save the model
    pkl_path = 'model.pkl'
    with open(pkl_path, 'wb') as file:
        pickle.dump(model, file)
        print(f"Model saved at {pkl_path}")

def main():
    # Load the whole data
    data = pd.read_csv('cleaned_data.csv', keep_default_na=False, index_col=0)

    # Split train/test
    # Creates train.csv and test.csv
    create_train_test_data(data)

    # Loads the data for the model training
    train = pd.read_csv('train.csv', keep_default_na=False)
    x_train = train.drop(columns=['output_own_price'])
    y_train = train['output_own_price']

    # Loads the data for the model testing
    test = pd.read_csv('test.csv', keep_default_na=False)
    x_test = test.drop(columns=['output_own_price'])
    y_test = test['output_own_price']

    # Train and Test
    model = train_model(x_train, y_train)


    
    # Save the model
    export_model(model)

if __name__ == '__main__':
    main()

Learning rate set to 0.063304
0:	learn: 0.1914585	total: 4.87ms	remaining: 4.87s
1:	learn: 0.1807181	total: 9.45ms	remaining: 4.72s
2:	learn: 0.1707479	total: 14.1ms	remaining: 4.7s
3:	learn: 0.1613408	total: 18.5ms	remaining: 4.6s
4:	learn: 0.1528139	total: 22.7ms	remaining: 4.52s
5:	learn: 0.1446823	total: 26.9ms	remaining: 4.45s
6:	learn: 0.1372005	total: 31.9ms	remaining: 4.52s
7:	learn: 0.1302960	total: 36.3ms	remaining: 4.5s
8:	learn: 0.1238555	total: 40.3ms	remaining: 4.44s
9:	learn: 0.1178840	total: 44.7ms	remaining: 4.43s
10:	learn: 0.1122533	total: 49.1ms	remaining: 4.41s
11:	learn: 0.1071274	total: 53.2ms	remaining: 4.38s
12:	learn: 0.1023937	total: 57.1ms	remaining: 4.34s
13:	learn: 0.0981583	total: 61.9ms	remaining: 4.36s
14:	learn: 0.0941506	total: 65.8ms	remaining: 4.32s
15:	learn: 0.0904605	total: 70.5ms	remaining: 4.34s
16:	learn: 0.0871756	total: 74.7ms	remaining: 4.32s
17:	learn: 0.0840772	total: 78.8ms	remaining: 4.3s
18:	learn: 0.0812838	total: 84.1ms	remaining: 4.

196:	learn: 0.0445884	total: 867ms	remaining: 3.53s
197:	learn: 0.0445624	total: 871ms	remaining: 3.53s
198:	learn: 0.0445465	total: 877ms	remaining: 3.53s
199:	learn: 0.0444839	total: 882ms	remaining: 3.53s
200:	learn: 0.0444422	total: 886ms	remaining: 3.52s
201:	learn: 0.0443799	total: 891ms	remaining: 3.52s
202:	learn: 0.0443569	total: 896ms	remaining: 3.52s
203:	learn: 0.0443392	total: 900ms	remaining: 3.51s
204:	learn: 0.0442665	total: 905ms	remaining: 3.51s
205:	learn: 0.0442035	total: 910ms	remaining: 3.51s
206:	learn: 0.0441879	total: 914ms	remaining: 3.5s
207:	learn: 0.0441571	total: 919ms	remaining: 3.5s
208:	learn: 0.0441220	total: 923ms	remaining: 3.49s
209:	learn: 0.0441108	total: 928ms	remaining: 3.49s
210:	learn: 0.0440321	total: 932ms	remaining: 3.49s
211:	learn: 0.0439533	total: 937ms	remaining: 3.48s
212:	learn: 0.0439231	total: 941ms	remaining: 3.48s
213:	learn: 0.0438813	total: 946ms	remaining: 3.47s
214:	learn: 0.0438442	total: 950ms	remaining: 3.47s
215:	learn: 0.

391:	learn: 0.0379535	total: 1.74s	remaining: 2.71s
392:	learn: 0.0379142	total: 1.75s	remaining: 2.7s
393:	learn: 0.0379068	total: 1.75s	remaining: 2.7s
394:	learn: 0.0378326	total: 1.76s	remaining: 2.69s
395:	learn: 0.0377594	total: 1.76s	remaining: 2.69s
396:	learn: 0.0377247	total: 1.77s	remaining: 2.68s
397:	learn: 0.0377017	total: 1.77s	remaining: 2.68s
398:	learn: 0.0376722	total: 1.77s	remaining: 2.67s
399:	learn: 0.0376438	total: 1.78s	remaining: 2.67s
400:	learn: 0.0376095	total: 1.78s	remaining: 2.67s
401:	learn: 0.0375414	total: 1.79s	remaining: 2.66s
402:	learn: 0.0374873	total: 1.79s	remaining: 2.66s
403:	learn: 0.0374512	total: 1.8s	remaining: 2.65s
404:	learn: 0.0374060	total: 1.8s	remaining: 2.65s
405:	learn: 0.0373903	total: 1.81s	remaining: 2.64s
406:	learn: 0.0373761	total: 1.81s	remaining: 2.64s
407:	learn: 0.0373638	total: 1.81s	remaining: 2.63s
408:	learn: 0.0373533	total: 1.82s	remaining: 2.63s
409:	learn: 0.0373350	total: 1.82s	remaining: 2.62s
410:	learn: 0.03

559:	learn: 0.0332504	total: 2.45s	remaining: 1.93s
560:	learn: 0.0332051	total: 2.46s	remaining: 1.92s
561:	learn: 0.0331812	total: 2.46s	remaining: 1.92s
562:	learn: 0.0331705	total: 2.47s	remaining: 1.92s
563:	learn: 0.0331595	total: 2.47s	remaining: 1.91s
564:	learn: 0.0331475	total: 2.48s	remaining: 1.91s
565:	learn: 0.0331189	total: 2.48s	remaining: 1.9s
566:	learn: 0.0330705	total: 2.49s	remaining: 1.9s
567:	learn: 0.0330597	total: 2.49s	remaining: 1.9s
568:	learn: 0.0330523	total: 2.5s	remaining: 1.89s
569:	learn: 0.0330104	total: 2.5s	remaining: 1.89s
570:	learn: 0.0329987	total: 2.51s	remaining: 1.88s
571:	learn: 0.0329536	total: 2.51s	remaining: 1.88s
572:	learn: 0.0329463	total: 2.52s	remaining: 1.87s
573:	learn: 0.0329401	total: 2.52s	remaining: 1.87s
574:	learn: 0.0329201	total: 2.52s	remaining: 1.86s
575:	learn: 0.0329104	total: 2.53s	remaining: 1.86s
576:	learn: 0.0329042	total: 2.53s	remaining: 1.86s
577:	learn: 0.0329008	total: 2.54s	remaining: 1.85s
578:	learn: 0.032

720:	learn: 0.0298492	total: 3.16s	remaining: 1.22s
721:	learn: 0.0298262	total: 3.17s	remaining: 1.22s
722:	learn: 0.0297867	total: 3.17s	remaining: 1.21s
723:	learn: 0.0297453	total: 3.17s	remaining: 1.21s
724:	learn: 0.0297333	total: 3.18s	remaining: 1.21s
725:	learn: 0.0297286	total: 3.18s	remaining: 1.2s
726:	learn: 0.0297232	total: 3.19s	remaining: 1.2s
727:	learn: 0.0296831	total: 3.19s	remaining: 1.19s
728:	learn: 0.0296680	total: 3.19s	remaining: 1.19s
729:	learn: 0.0296302	total: 3.2s	remaining: 1.18s
730:	learn: 0.0296081	total: 3.2s	remaining: 1.18s
731:	learn: 0.0295884	total: 3.21s	remaining: 1.17s
732:	learn: 0.0295525	total: 3.21s	remaining: 1.17s
733:	learn: 0.0295297	total: 3.21s	remaining: 1.16s
734:	learn: 0.0295015	total: 3.22s	remaining: 1.16s
735:	learn: 0.0294841	total: 3.22s	remaining: 1.16s
736:	learn: 0.0294752	total: 3.23s	remaining: 1.15s
737:	learn: 0.0294661	total: 3.23s	remaining: 1.15s
738:	learn: 0.0294310	total: 3.23s	remaining: 1.14s
739:	learn: 0.02

884:	learn: 0.0267191	total: 3.87s	remaining: 502ms
885:	learn: 0.0266867	total: 3.87s	remaining: 498ms
886:	learn: 0.0266503	total: 3.88s	remaining: 494ms
887:	learn: 0.0266389	total: 3.88s	remaining: 489ms
888:	learn: 0.0266317	total: 3.88s	remaining: 485ms
889:	learn: 0.0266195	total: 3.89s	remaining: 481ms
890:	learn: 0.0266139	total: 3.89s	remaining: 476ms
891:	learn: 0.0265822	total: 3.9s	remaining: 472ms
892:	learn: 0.0265720	total: 3.9s	remaining: 467ms
893:	learn: 0.0265456	total: 3.9s	remaining: 463ms
894:	learn: 0.0265320	total: 3.91s	remaining: 458ms
895:	learn: 0.0265181	total: 3.91s	remaining: 454ms
896:	learn: 0.0265116	total: 3.92s	remaining: 450ms
897:	learn: 0.0264956	total: 3.92s	remaining: 445ms
898:	learn: 0.0264637	total: 3.92s	remaining: 441ms
899:	learn: 0.0264446	total: 3.93s	remaining: 436ms
900:	learn: 0.0264118	total: 3.93s	remaining: 432ms
901:	learn: 0.0264041	total: 3.94s	remaining: 428ms
902:	learn: 0.0264000	total: 3.94s	remaining: 423ms
903:	learn: 0.0

In [96]:
def predict_price(X_new, train_model):
    # Make predictions using the trained model
    y_pred = xgb.predict(X_new)
    return y_pred


In [97]:
y_pred=predict_price(x_test,xgb)

In [98]:
predictions = pd.DataFrame(y_pred)
predictions.to_csv('predict.csv')