Feature Selection => Backward Selection
Data Ingestion

In [3]:
from warnings import filterwarnings
filterwarnings('ignore')

In [4]:
import pandas as pd

In [5]:
path = r"D:\Machine-Learning\repository\Data_Processing\Cars93.csv"

In [6]:
df = pd.read_csv(path,na_values=["","NA"],keep_default_na=False)
df.head()

Unnamed: 0,id,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,...,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Weight,Origin,Make
0,1,Acura,Integra,Small,12.9,15.9,18.8,25,31,,...,5,177,102,68,37,26.5,11.0,2705,non-USA,Acura Integra
1,2,Acura,Legend,Midsize,29.2,33.9,38.7,18,25,Driver & Passenger,...,5,195,115,71,38,30.0,15.0,3560,non-USA,Acura Legend
2,3,Audi,90,Compact,25.9,29.1,32.3,20,26,Driver only,...,5,180,102,67,37,28.0,14.0,3375,non-USA,Audi 90
3,4,Audi,100,Midsize,30.8,37.7,44.6,19,26,,...,6,193,106,70,37,31.0,17.0,3405,non-USA,Audi 100
4,5,BMW,535i,Midsize,23.7,30.0,36.2,22,30,Driver only,...,4,186,109,69,39,27.0,13.0,3640,non-USA,BMW 535i


Perform basic data quality checks

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 93 entries, 0 to 92
Data columns (total 28 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   id                  93 non-null     int64  
 1   Manufacturer        93 non-null     object 
 2   Model               93 non-null     object 
 3   Type                93 non-null     object 
 4   Min.Price           93 non-null     float64
 5   Price               93 non-null     float64
 6   Max.Price           93 non-null     float64
 7   MPG.city            93 non-null     int64  
 8   MPG.highway         93 non-null     int64  
 9   AirBags             89 non-null     object 
 10  DriveTrain          93 non-null     object 
 11  Cylinders           93 non-null     object 
 12  EngineSize          93 non-null     float64
 13  Horsepower          93 non-null     int64  
 14  RPM                 93 non-null     int64  
 15  Rev.per.mile        93 non-null     int64  
 16  Man.trans.

In [8]:
m = df.isna().sum()
m[m>0]

AirBags            4
Rear.seat.room     2
Luggage.room      11
dtype: int64

In [9]:
df.duplicated().sum()

0

Drop insignificant columns

In [10]:
df.drop(columns="id",inplace=True)

Separate X and Y

In [11]:
X = df.drop(columns="Weight")
Y = df[["Weight"]]

In [12]:
X.head()

Unnamed: 0,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,...,Fuel.tank.capacity,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Origin,Make
0,Acura,Integra,Small,12.9,15.9,18.8,25,31,,Front,...,13.2,5,177,102,68,37,26.5,11.0,non-USA,Acura Integra
1,Acura,Legend,Midsize,29.2,33.9,38.7,18,25,Driver & Passenger,Front,...,18.0,5,195,115,71,38,30.0,15.0,non-USA,Acura Legend
2,Audi,90,Compact,25.9,29.1,32.3,20,26,Driver only,Front,...,16.9,5,180,102,67,37,28.0,14.0,non-USA,Audi 90
3,Audi,100,Midsize,30.8,37.7,44.6,19,26,,Front,...,21.1,6,193,106,70,37,31.0,17.0,non-USA,Audi 100
4,BMW,535i,Midsize,23.7,30.0,36.2,22,30,Driver only,Rear,...,21.1,4,186,109,69,39,27.0,13.0,non-USA,BMW 535i


In [13]:
Y.head()

Unnamed: 0,Weight
0,2705
1,3560
2,3375
3,3405
4,3640


Data PreProcessing and Data Cleaning

In [14]:
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer


In [15]:
cat = list(X.columns[X.dtypes=="object"])
con = list(X.columns[X.dtypes!="object"])

In [16]:
cat_pipe = make_pipeline(
    SimpleImputer(strategy="most_frequent"),
    OrdinalEncoder()
)

In [17]:
con_pipe = make_pipeline(
    SimpleImputer(strategy="mean"),
    StandardScaler()
)

In [18]:
pre = ColumnTransformer([("cat",cat_pipe,cat)
                         ,("con",con_pipe,con)]).set_output(transform="pandas")

In [19]:
pre

In [20]:
X_pre = pre.fit_transform(X)

In [21]:
X_pre

Unnamed: 0,cat__Manufacturer,cat__Model,cat__Type,cat__AirBags,cat__DriveTrain,cat__Cylinders,cat__Man.trans.avail,cat__Origin,cat__Make,con__Min.Price,...,con__RPM,con__Rev.per.mile,con__Fuel.tank.capacity,con__Passengers,con__Length,con__Wheelbase,con__Width,con__Turn.circle,con__Rear.seat.room,con__Luggage.room
0,0.0,48.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,-0.485787,...,1.717489,1.129530,-1.062184,-0.083243,-0.427186,-0.286932,-0.366184,-0.610436,-0.452197,-1.033015
1,0.0,55.0,2.0,0.0,1.0,3.0,1.0,1.0,1.0,1.388017,...,0.369586,0.005661,0.409445,-0.083243,0.812171,1.629649,0.431983,-0.298510,0.738090,0.396643
2,1.0,8.0,0.0,1.0,1.0,3.0,1.0,1.0,3.0,1.008658,...,0.369586,-0.105713,0.072197,-0.083243,-0.220626,-0.286932,-0.632239,-0.610436,0.057926,0.039228
3,1.0,0.0,2.0,1.0,1.0,3.0,1.0,1.0,2.0,1.571949,...,0.369586,0.410659,1.359872,0.884457,0.674465,0.302785,0.165927,-0.610436,1.078172,1.111472
4,2.0,5.0,2.0,1.0,2.0,1.0,1.0,1.0,4.0,0.755752,...,0.706562,0.430909,1.359872,-1.050944,0.192493,0.745073,-0.100128,0.013416,-0.282156,-0.318186
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,30.0,41.0,5.0,2.0,1.0,2.0,1.0,1.0,88.0,-0.060445,...,-1.315292,1.180155,1.359872,1.852158,0.261346,1.629649,0.698038,-0.298510,2.098419,0.000000
89,30.0,64.0,0.0,2.0,1.0,1.0,1.0,1.0,90.0,0.054512,...,0.875050,0.714407,0.562740,-0.083243,-0.220626,-0.139503,-0.632239,-1.234288,1.248213,0.039228
90,30.0,28.0,4.0,2.0,1.0,3.0,1.0,1.0,87.0,0.663786,...,0.875050,0.106911,0.562740,-1.050944,-1.666543,-1.024079,-0.898295,-0.922362,-0.622238,0.396643
91,31.0,2.0,0.0,1.0,2.0,1.0,1.0,1.0,91.0,0.537333,...,0.201098,-0.237337,-0.265051,-0.083243,0.467905,0.007926,-0.632239,-0.610436,0.568049,0.039228


Feature Selection => Backward Selection

In [22]:
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.linear_model import LinearRegression

base_model = LinearRegression()

for_sel= SequentialFeatureSelector(base_model,direction="backward",n_features_to_select=10)"auto"

for_sel.fit(X_pre,Y)

for_sel.get_feature_names_out

In [23]:
base_model = LinearRegression()
for_sel= SequentialFeatureSelector(base_model,direction="backward",n_features_to_select="auto")

In [24]:
for_sel.fit(X_pre,Y)

In [25]:
imp_cols = for_sel.get_feature_names_out()
imp_cols

array(['cat__Manufacturer', 'cat__Type', 'cat__AirBags', 'cat__Cylinders',
       'cat__Origin', 'con__Price', 'con__MPG.highway', 'con__Horsepower',
       'con__RPM', 'con__Fuel.tank.capacity', 'con__Passengers',
       'con__Length', 'con__Wheelbase'], dtype=object)

In [26]:
len(imp_cols)

13

In [27]:
imp_cols

array(['cat__Manufacturer', 'cat__Type', 'cat__AirBags', 'cat__Cylinders',
       'cat__Origin', 'con__Price', 'con__MPG.highway', 'con__Horsepower',
       'con__RPM', 'con__Fuel.tank.capacity', 'con__Passengers',
       'con__Length', 'con__Wheelbase'], dtype=object)

In [28]:
imp_cols[0]

'cat__Manufacturer'

In [29]:
imp_cols[0].split("__")

['cat', 'Manufacturer']

In [30]:
imp_cols[0].split("__")[1]

'Manufacturer'

In [31]:
sel_cols = [col.split("__")[1] for col in imp_cols]
sel_cols

['Manufacturer',
 'Type',
 'AirBags',
 'Cylinders',
 'Origin',
 'Price',
 'MPG.highway',
 'Horsepower',
 'RPM',
 'Fuel.tank.capacity',
 'Passengers',
 'Length',
 'Wheelbase']

In [32]:
X_sel = X[sel_cols]
X_sel

Unnamed: 0,Manufacturer,Type,AirBags,Cylinders,Origin,Price,MPG.highway,Horsepower,RPM,Fuel.tank.capacity,Passengers,Length,Wheelbase
0,Acura,Small,,4,non-USA,15.9,31,140,6300,13.2,5,177,102
1,Acura,Midsize,Driver & Passenger,6,non-USA,33.9,25,200,5500,18.0,5,195,115
2,Audi,Compact,Driver only,6,non-USA,29.1,26,172,5500,16.9,5,180,102
3,Audi,Midsize,,6,non-USA,37.7,26,172,5500,21.1,6,193,106
4,BMW,Midsize,Driver only,4,non-USA,30.0,30,208,5700,21.1,4,186,109
...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,Volkswagen,Van,,5,non-USA,19.7,21,109,4500,21.1,7,187,115
89,Volkswagen,Compact,,4,non-USA,20.0,30,134,5800,18.5,5,180,103
90,Volkswagen,Sporty,,6,non-USA,23.3,25,178,5800,18.5,4,159,97
91,Volvo,Compact,Driver only,4,non-USA,22.7,28,114,5400,15.8,5,190,104


Data Preprocessing : 2nd step

In [33]:
from sklearn.preprocessing import OneHotEncoder

In [34]:
X_sel_cat = list(X_sel.columns[X_sel.dtypes=="object"])
X_sel_con = list(X_sel.columns[X_sel.dtypes!="object"])

In [35]:
cat_sel_pipe = make_pipeline(
    SimpleImputer(strategy="most_frequent"),
    OneHotEncoder(handle_unknown='ignore',sparse_output=False)
)

In [36]:
num_sel_pipe = make_pipeline(
    SimpleImputer(strategy="mean"),
    StandardScaler()
)

In [37]:
pre1  = ColumnTransformer([("cat",cat_sel_pipe,X_sel_cat)
                           ,("con",num_sel_pipe,X_sel_con)]).set_output(transform='pandas')

In [38]:
pre1

In [39]:
X_sel_pre = pre1.fit_transform(X_sel)
X_sel_pre

Unnamed: 0,cat__Manufacturer_Acura,cat__Manufacturer_Audi,cat__Manufacturer_BMW,cat__Manufacturer_Buick,cat__Manufacturer_Cadillac,cat__Manufacturer_Chevrolet,cat__Manufacturer_Chrylser,cat__Manufacturer_Chrysler,cat__Manufacturer_Dodge,cat__Manufacturer_Eagle,...,cat__Origin_USA,cat__Origin_non-USA,con__Price,con__MPG.highway,con__Horsepower,con__RPM,con__Fuel.tank.capacity,con__Passengers,con__Length,con__Wheelbase
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,-0.375720,0.360925,-0.073484,1.717489,-1.062184,-0.083243,-0.427186,-0.286932
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.497844,-0.770514,1.078322,0.369586,0.409445,-0.083243,0.812171,1.629649
2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.998227,-0.581941,0.540813,0.369586,0.072197,-0.083243,-0.220626,-0.286932
3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.893374,-0.581941,0.540813,0.369586,1.359872,0.884457,0.674465,0.302785
4,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.091905,0.172352,1.231897,0.706562,1.359872,-1.050944,0.192493,0.745073
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.019810,-1.524806,-0.668585,-1.315292,1.359872,1.852158,0.261346,1.629649
89,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.051036,0.172352,-0.188665,0.875050,0.562740,-0.083243,-0.220626,-0.139503
90,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.394523,-0.770514,0.655993,0.875050,0.562740,-1.050944,-1.666543,-1.024079
91,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.332071,-0.204794,-0.572601,0.201098,-0.265051,-0.083243,0.467905,0.007926


Train test split

In [40]:
from sklearn.model_selection import train_test_split

In [41]:
xtrain,xtest,ytrain,ytest = train_test_split(X_sel_pre,Y,train_size=0.8,random_state=21)

In [42]:
xtrain.head()

Unnamed: 0,cat__Manufacturer_Acura,cat__Manufacturer_Audi,cat__Manufacturer_BMW,cat__Manufacturer_Buick,cat__Manufacturer_Cadillac,cat__Manufacturer_Chevrolet,cat__Manufacturer_Chrylser,cat__Manufacturer_Chrysler,cat__Manufacturer_Dodge,cat__Manufacturer_Eagle,...,cat__Origin_USA,cat__Origin_non-USA,con__Price,con__MPG.highway,con__Horsepower,con__RPM,con__Fuel.tank.capacity,con__Passengers,con__Length,con__Wheelbase
57,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.28967,-0.016221,-0.265452,-0.304365,-0.663618,-0.083243,-0.564892,0.155356
31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,-0.979424,0.172352,-0.323043,2.054464,-1.062184,-0.083243,-0.840305,-0.87665
62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.685966,-0.959087,1.116716,1.212025,0.716035,-0.083243,0.467905,0.450214
29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,1.0,0.0,-0.021825,-0.204794,1.347077,0.87505,0.409445,0.884457,1.294143,1.334791
51,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.726835,-0.581941,1.27029,-1.146804,1.022624,0.884457,2.464647,1.924508


In [43]:
xtest.head()

Unnamed: 0,cat__Manufacturer_Acura,cat__Manufacturer_Audi,cat__Manufacturer_BMW,cat__Manufacturer_Buick,cat__Manufacturer_Cadillac,cat__Manufacturer_Chevrolet,cat__Manufacturer_Chrylser,cat__Manufacturer_Chrysler,cat__Manufacturer_Dodge,cat__Manufacturer_Eagle,...,cat__Origin_USA,cat__Origin_non-USA,con__Price,con__MPG.highway,con__Horsepower,con__RPM,con__Fuel.tank.capacity,con__Passengers,con__Length,con__Wheelbase
23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,-0.85452,-0.016221,-0.975733,-0.809828,-0.816912,-0.083243,-0.771452,-1.024079
86,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.332071,-1.336233,-0.111878,-0.472853,0.961306,1.852158,0.261346,1.334791
91,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.332071,-0.204794,-0.572601,0.201098,-0.265051,-0.083243,0.467905,0.007926
21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,1.039862,-0.581941,0.060893,-0.809828,-0.203734,0.884457,1.362996,0.892502
17,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,-0.073868,-0.581941,0.502419,-1.820755,1.942392,0.884457,2.120381,1.777079


In [44]:
ytrain.head()

Unnamed: 0,Weight
57,2920
31,2530
62,3730
29,3490
51,4055


In [45]:
ytest.head()

Unnamed: 0,Weight
23,2670
86,3785
91,2985
21,3570
17,3910


Build a model

In [46]:
model = LinearRegression()
model.fit(xtrain,ytrain)

In [47]:
model.score(xtrain,ytrain)

0.9859914776613601

In [48]:
model.score(xtest,ytest)

0.9133161505760057

Model Evaluation

In [49]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score

In [50]:
ypred = model.predict(xtrain)

In [51]:
mse = mean_squared_error(ytrain,ypred)
mse

4712.062020840985

In [52]:
rmse = mse**(1/2)
rmse

68.64446096256408

In [53]:
ytrain.head()

Unnamed: 0,Weight
57,2920
31,2530
62,3730
29,3490
51,4055


In [54]:
ypred[:5]

array([[2864.4457755 ],
       [2444.05827698],
       [3552.36775736],
       [3543.58581955],
       [4107.94678651]])

In [55]:
ypred_test = model.predict(xtest)
ypred_test[:5]

array([[2632.46494611],
       [3911.75987976],
       [3223.30514299],
       [3664.75090238],
       [3852.99325493]])

In [56]:
ytest.head()

Unnamed: 0,Weight
23,2670
86,3785
91,2985
21,3570
17,3910


We can use this model for out of sample predictions since training r2 score is around 98% and testing score is around 92%
Out of sample predctions

In [57]:
xnew = pd.read_csv(r"D:\Machine-Learning\repository\Data_Processing\sample_cars93.csv",
                   na_values=["","NA"],keep_default_na=False)

In [58]:
xnew.head()

Unnamed: 0,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,...,Fuel.tank.capacity,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Origin,Make
0,Audi,100,Midsize,30.8,37.7,44.6,19,26,,Front,...,15.0,6,190,106,65,37,31.0,17.0,non-USA,Audi 100
1,Pontiac,Sunbird,Compact,9.4,11.1,12.8,23,31,,Front,...,15.2,5,181,101,66,39,25.0,13.0,USA,Pontiac Sunbird
2,Chevrolet,Lumina,Midsize,13.4,15.9,18.4,21,29,,Front,...,16.5,6,198,108,71,40,28.5,16.0,USA,Chevrolet Lumina
3,Mazda,RX-7,Sporty,32.5,32.5,32.5,17,25,Driver only,Rear,...,20.0,2,169,96,69,37,,,non-USA,Mazda RX-7
4,Volkswagen,Fox,Small,8.7,9.1,9.5,25,33,,Front,...,12.4,4,163,93,63,34,26.0,10.0,non-USA,Volkswagen Fox


In [59]:
pre1

In [60]:
xnew_pre = pre1.transform(xnew)
xnew_pre

Unnamed: 0,cat__Manufacturer_Acura,cat__Manufacturer_Audi,cat__Manufacturer_BMW,cat__Manufacturer_Buick,cat__Manufacturer_Cadillac,cat__Manufacturer_Chevrolet,cat__Manufacturer_Chrylser,cat__Manufacturer_Chrysler,cat__Manufacturer_Dodge,cat__Manufacturer_Eagle,...,cat__Origin_USA,cat__Origin_non-USA,con__Price,con__MPG.highway,con__Horsepower,con__RPM,con__Fuel.tank.capacity,con__Passengers,con__Length,con__Wheelbase
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.893374,-0.581941,0.540813,0.369586,-0.510323,0.884457,0.467905,0.302785
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,-0.875337,0.360925,-0.649388,-0.135877,-0.449005,-0.083243,-0.151773,-0.434362
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,-0.37572,-0.016221,-0.649388,-0.135877,-0.050439,0.884457,1.018731,0.597644
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.352122,-0.770514,2.134145,2.054464,1.022624,-2.986345,-0.978011,-1.171509
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,-1.083511,0.738071,-1.206095,0.369586,-1.307455,-1.050944,-1.39113,-1.613797


In [61]:
preds = model.predict(xnew_pre)
preds

array([[3695.74554298],
       [2714.90553984],
       [3143.91102484],
       [2895.        ],
       [2281.07817625]])

Save the predicted results to dataframe and then to csv file

In [62]:
xnew["WeightPredicted"] = preds
xnew

Unnamed: 0,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,...,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Origin,Make,WeightPredicted
0,Audi,100,Midsize,30.8,37.7,44.6,19,26,,Front,...,6,190,106,65,37,31.0,17.0,non-USA,Audi 100,3695.745543
1,Pontiac,Sunbird,Compact,9.4,11.1,12.8,23,31,,Front,...,5,181,101,66,39,25.0,13.0,USA,Pontiac Sunbird,2714.90554
2,Chevrolet,Lumina,Midsize,13.4,15.9,18.4,21,29,,Front,...,6,198,108,71,40,28.5,16.0,USA,Chevrolet Lumina,3143.911025
3,Mazda,RX-7,Sporty,32.5,32.5,32.5,17,25,Driver only,Rear,...,2,169,96,69,37,,,non-USA,Mazda RX-7,2895.0
4,Volkswagen,Fox,Small,8.7,9.1,9.5,25,33,,Front,...,4,163,93,63,34,26.0,10.0,non-USA,Volkswagen Fox,2281.078176


In [63]:
xnew.to_csv("BackwardSelectionResults.csv",index=False)