In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('gurgaon_properties_post_feature_selection_v2.csv')

In [3]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,0.82,3.0,2.0,2,New Property,850.0,0.0,0.0,0.0,Low,Low Floor
1,flat,sector 89,0.95,2.0,2.0,2,New Property,1226.0,1.0,0.0,0.0,Low,Mid Floor
2,flat,sohna road,0.32,2.0,2.0,1,New Property,1000.0,0.0,0.0,0.0,Low,High Floor
3,flat,sector 92,1.6,3.0,4.0,3+,Relatively New,1615.0,1.0,0.0,1.0,High,Mid Floor
4,flat,sector 102,0.48,2.0,2.0,1,Relatively New,582.0,0.0,1.0,0.0,High,Mid Floor


In [4]:
df['furnishing_type'].value_counts()

0.0    2349
1.0    1018
2.0     187
Name: furnishing_type, dtype: int64

In [5]:
#0-unfurnised
#1-semi
#2 fully

In [6]:
df['furnishing_type']=df['furnishing_type'].replace({0.0:'unfurnised',1.0:'semifurnised',2.0:'furnised'})

In [8]:
df.head()

Unnamed: 0,property_type,sector,price,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,0.82,3.0,2.0,2,New Property,850.0,0.0,0.0,unfurnised,Low,Low Floor
1,flat,sector 89,0.95,2.0,2.0,2,New Property,1226.0,1.0,0.0,unfurnised,Low,Mid Floor
2,flat,sohna road,0.32,2.0,2.0,1,New Property,1000.0,0.0,0.0,unfurnised,Low,High Floor
3,flat,sector 92,1.6,3.0,4.0,3+,Relatively New,1615.0,1.0,0.0,semifurnised,High,Mid Floor
4,flat,sector 102,0.48,2.0,2.0,1,Relatively New,582.0,0.0,1.0,unfurnised,High,Mid Floor


In [9]:
X=df.drop(columns=['price'])
y=df['price']

In [10]:
#since our output col is right skewed so to make it normalize i'll apply log-tranform

In [11]:
# Applying the log1p transformation to the target variable
y_transformed = np.log1p(y)

In [12]:
#i willuse 3 types of encoding and will find the r2 score and then will consider the encoding based on best r2score

### 1.Ordinal Encoding

In [13]:
columns_to_encode = ['property_type','sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

In [16]:
# Creating a column transformer for preprocessing
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler,OrdinalEncoder

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode)
    ], 
    remainder='passthrough'
)

In [18]:
# Creating a pipeline
from sklearn.pipeline import Pipeline
from sklearn.linear_model  import LinearRegression
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [29]:
from sklearn.model_selection import cross_val_score,KFold

# K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

In [30]:
scores.mean(),scores.std()

(0.7363096633436828, 0.03238005754429937)

In [31]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)

In [32]:
pipeline.fit(X_train,y_train)

In [33]:
y_pred = pipeline.predict(X_test)

In [34]:
y_pred = np.expm1(y_pred)

In [35]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(np.expm1(y_test),y_pred)

0.9463822160089357

In [62]:
#crating a function to do the complete thing that i did above

In [36]:
def scorer(model_name,model):
    output=[]
    output.append(model_name)
    
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    
    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')
    
    output.append(scores.mean())
    
    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)
    pipeline.fit(X_train,y_train)
    
    y_pred = pipeline.predict(X_test)
    y_pred = np.expm1(y_pred)
    
    output.append(mean_absolute_error(np.expm1(y_test),y_pred))
    
    return output

In [56]:
from sklearn.svm import SVR
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor


In [57]:
model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [58]:
model_output=[]
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name,model))
    

In [59]:
model_output

[['linear_reg', 0.7363096633436828, 0.9463822160089357],
 ['svr', 0.7642012011196353, 0.8472636473483922],
 ['ridge', 0.7363125343993552, 0.9463387741853366],
 ['LASSO', 0.05943378064493573, 1.528905986892753],
 ['decision tree', 0.7777182652177298, 0.7122144264407027],
 ['random forest', 0.880367246089493, 0.5304496916204123],
 ['extra trees', 0.8671796603007136, 0.5531919582271094],
 ['gradient boosting', 0.8725719794529334, 0.5760267791484706],
 ['adaboost', 0.7516011143658858, 0.8112611504581413],
 ['mlp', 0.8092673296437388, 0.7002192772765337],
 ['xgboost', 0.8894876835260124, 0.5040475141482346]]

In [60]:
model_df=pd.DataFrame(model_output,columns=['name','r2','mae'])

In [61]:
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
10,xgboost,0.889488,0.504048
5,random forest,0.880367,0.53045
6,extra trees,0.86718,0.553192
7,gradient boosting,0.872572,0.576027
9,mlp,0.809267,0.700219
4,decision tree,0.777718,0.712214
8,adaboost,0.751601,0.811261
1,svr,0.764201,0.847264
2,ridge,0.736313,0.946339
0,linear_reg,0.73631,0.946382


### 2.One Hot Encoding

In [64]:
# Creating a column transformer for preprocessing
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler,OrdinalEncoder,OneHotEncoder

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first'),['sector','agePossession', 'furnishing_type'])
    ], 
    remainder='passthrough'
)

In [65]:
# Creating a pipeline
from sklearn.pipeline import Pipeline
from sklearn.linear_model  import LinearRegression
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [66]:
kfold=KFold(n_splits=10,shuffle=True,random_state=42)
scores=cross_val_score(pipeline,X,y_transformed,cv=kfold,scoring='r2')

In [67]:
scores.mean()

0.8546151354698773

In [68]:
scores.std()

0.01599244635272164

In [70]:
X_train,X_test,y_train,y_test=train_test_split(X,y_transformed,test_size=0.2,random_state=42)

pipeline.fit(X_train,y_train)


In [71]:
y_pred=pipeline.predict(X_test)

y_pred=np.expm1(y_pred)

In [72]:
mean_absolute_error(np.expm1(y_test),y_pred)

0.6496725232139183

In [73]:
def scorer(model_name,model):
    output=[]
    output.append(model_name)
    
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    
    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')
    
    output.append(scores.mean())
    
    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)
    pipeline.fit(X_train,y_train)
    
    y_pred = pipeline.predict(X_test)
    y_pred = np.expm1(y_pred)
    
    output.append(mean_absolute_error(np.expm1(y_test),y_pred))
    
    return output

In [74]:
model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [75]:
model_output=[]
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name,model))

In [76]:
model_output

[['linear_reg', 0.8546151354698773, 0.6496725232139183],
 ['svr', 0.7697413260547326, 0.8341243500492146],
 ['ridge', 0.8546784656319515, 0.6528940620586247],
 ['LASSO', 0.05943378064493578, 1.528905986892753],
 ['decision tree', 0.8056470542430321, 0.6968633966044269],
 ['random forest', 0.890355678478653, 0.49318632975189136],
 ['extra trees', 0.8953738089807409, 0.4739449646988937],
 ['gradient boosting', 0.8766799144510115, 0.5685812458005489],
 ['adaboost', 0.7568790255176207, 0.834845544715943],
 ['mlp', 0.870506879586873, 0.5588416195034289],
 ['xgboost', 0.8958499681743852, 0.4934562667923469]]

In [77]:
model_df=pd.DataFrame(model_output,columns=['name','r2','mae'])

In [80]:
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
6,extra trees,0.895374,0.473945
5,random forest,0.890356,0.493186
10,xgboost,0.89585,0.493456
9,mlp,0.870507,0.558842
7,gradient boosting,0.87668,0.568581
0,linear_reg,0.854615,0.649673
2,ridge,0.854678,0.652894
4,decision tree,0.805647,0.696863
1,svr,0.769741,0.834124
8,adaboost,0.756879,0.834846


### OneHotEncoding With PCA

In [81]:
# Creating a column transformer for preprocessing
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler,OrdinalEncoder,OneHotEncoder

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['sector','agePossession'])
    ], 
    remainder='passthrough'
)

In [84]:
# Creating a pipeline
from sklearn.pipeline import Pipeline
from sklearn.linear_model  import LinearRegression
from sklearn.decomposition import PCA
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('pca',PCA(n_components=0.95)),
    ('regressor', LinearRegression())
])

# K-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

scores.mean(),scores.std()

(0.06225201431451136, 0.01986059407164015)

In [85]:
def scorer(model_name, model):
    
    output = []
    
    output.append(model_name)
    
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('pca', PCA(n_components=0.95)),
        ('regressor', model)
    ])
    
    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')
    
    output.append(scores.mean())
    
    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)
    
    pipeline.fit(X_train,y_train)
    
    y_pred = pipeline.predict(X_test)
    
    y_pred = np.expm1(y_pred)
    
    output.append(mean_absolute_error(np.expm1(y_test),y_pred))
    
    return output
    


In [86]:
model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [87]:
model_output = []
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name, model))

In [88]:
model_df = pd.DataFrame(model_output, columns=['name','r2','mae'])
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
5,random forest,0.761414,0.648081
6,extra trees,0.73972,0.700358
4,decision tree,0.696182,0.75729
10,xgboost,0.620664,0.948597
7,gradient boosting,0.610604,0.987906
1,svr,0.218073,1.361163
8,adaboost,0.313441,1.38697
9,mlp,0.206832,1.407282
2,ridge,0.062252,1.526707
0,linear_reg,0.062252,1.526707


### 3.Target Encoder

In [93]:
import category_encoders as ce

columns_to_encode = ['property_type','sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['agePossession']),
        ('target_enc', ce.TargetEncoder(), ['sector'])
    ], 
    remainder='passthrough'
)

In [94]:
# Creating a pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [95]:
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')

scores.mean()

0.8295219182255362

In [96]:
def scorer(model_name, model):
    
    output = []
    
    output.append(model_name)
    
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    
    # K-fold cross-validation
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_val_score(pipeline, X, y_transformed, cv=kfold, scoring='r2')
    
    output.append(scores.mean())
    
    X_train, X_test, y_train, y_test = train_test_split(X,y_transformed,test_size=0.2,random_state=42)
    
    pipeline.fit(X_train,y_train)
    
    y_pred = pipeline.predict(X_test)
    
    y_pred = np.expm1(y_pred)
    
    output.append(mean_absolute_error(np.expm1(y_test),y_pred))
    
    return output

In [97]:
model_dict = {
    'linear_reg':LinearRegression(),
    'svr':SVR(),
    'ridge':Ridge(),
    'LASSO':Lasso(),
    'decision tree': DecisionTreeRegressor(),
    'random forest':RandomForestRegressor(),
    'extra trees': ExtraTreesRegressor(),
    'gradient boosting': GradientBoostingRegressor(),
    'adaboost': AdaBoostRegressor(),
    'mlp': MLPRegressor(),
    'xgboost':XGBRegressor()
}

In [98]:
model_output = []
for model_name,model in model_dict.items():
    model_output.append(scorer(model_name, model))

In [100]:
model_df = pd.DataFrame(model_output, columns=['name','r2','mae'])
model_df.sort_values(['mae'])

Unnamed: 0,name,r2,mae
10,xgboost,0.904798,0.447518
5,random forest,0.899971,0.450384
6,extra trees,0.902228,0.457601
7,gradient boosting,0.888899,0.510822
4,decision tree,0.828218,0.556053
9,mlp,0.851335,0.601242
0,linear_reg,0.829522,0.713011
2,ridge,0.829536,0.713523
8,adaboost,0.818571,0.714767
1,svr,0.782917,0.818851


### Hyperparameter Tuning

In [101]:
from sklearn.model_selection import GridSearchCV

In [119]:
param_grid = {
    'regressor__n_estimators': [50, 100, 200, 300],
    'regressor__max_depth': [None, 10, 20, 30],
    'regressor__max_samples':[0.1, 0.25, 0.5, 1.0],
    'regressor__max_features': ['auto', 1.0]
}

In [120]:
columns_to_encode = ['property_type','sector', 'balcony', 'agePossession', 'furnishing_type', 'luxury_category', 'floor_category']

# Creating a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['agePossession']),
        ('target_enc', ce.TargetEncoder(), ['sector'])
    ], 
    remainder='passthrough'
)

In [121]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor())
])

kfold = KFold(n_splits=10, shuffle=True, random_state=42)

search = GridSearchCV(pipeline, param_grid, cv=kfold, scoring='r2', n_jobs=-1, verbose=4)


In [122]:
search.fit(X, y_transformed)

Fitting 10 folds for each of 128 candidates, totalling 1280 fits


640 fits failed out of a total of 1280.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
306 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Lucky Singh\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Lucky Singh\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\Lucky Singh\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\pipeline.py", line 427, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "C:\U

In [123]:
final_pipe = search.best_estimator_

In [124]:
search.best_params_

{'regressor__max_depth': 30,
 'regressor__max_features': 1.0,
 'regressor__max_samples': 1.0,
 'regressor__n_estimators': 200}

In [125]:
search.best_score_

0.9026174852286127

In [129]:
final_pipe.fit(X,y_transformed)

### Exporting the model

In [131]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['bedRoom', 'bathroom', 'built_up_area', 'servant room', 'store room']),
        ('cat', OrdinalEncoder(), columns_to_encode),
        ('cat1',OneHotEncoder(drop='first',sparse_output=False),['sector','agePossession'])
    ], 
    remainder='passthrough'
)

In [132]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=500))
])

In [133]:
pipeline.fit(X,y_transformed)

In [113]:
import pickle

with open('pipeline.pkl', 'wb') as file:
    pickle.dump(pipeline, file)

In [114]:
with open('df.pkl', 'wb') as file:
    pickle.dump(X, file)

In [115]:
X

Unnamed: 0,property_type,sector,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,flat,sector 36,3.0,2.0,2,New Property,850.0,0.0,0.0,unfurnised,Low,Low Floor
1,flat,sector 89,2.0,2.0,2,New Property,1226.0,1.0,0.0,unfurnised,Low,Mid Floor
2,flat,sohna road,2.0,2.0,1,New Property,1000.0,0.0,0.0,unfurnised,Low,High Floor
3,flat,sector 92,3.0,4.0,3+,Relatively New,1615.0,1.0,0.0,semifurnised,High,Mid Floor
4,flat,sector 102,2.0,2.0,1,Relatively New,582.0,0.0,1.0,unfurnised,High,Mid Floor
...,...,...,...,...,...,...,...,...,...,...,...,...
3549,flat,sector 84,2.0,2.0,1,Relatively New,532.0,0.0,0.0,unfurnised,Medium,Mid Floor
3550,house,sector 109,5.0,5.0,3+,Relatively New,6228.0,1.0,1.0,unfurnised,High,Low Floor
3551,flat,sector 2,1.0,1.0,1,Moderately Old,665.0,0.0,0.0,semifurnised,Medium,Mid Floor
3552,house,sector 43,5.0,6.0,3,Moderately Old,5490.0,1.0,1.0,unfurnised,Medium,Mid Floor


### Predictions

In [116]:
X.columns

Index(['property_type', 'sector', 'bedRoom', 'bathroom', 'balcony',
       'agePossession', 'built_up_area', 'servant room', 'store room',
       'furnishing_type', 'luxury_category', 'floor_category'],
      dtype='object')

In [118]:
X.iloc[0].values

array(['flat', 'sector 36', 3.0, 2.0, '2', 'New Property', 850.0, 0.0,
       0.0, 'unfurnised', 'Low', 'Low Floor'], dtype=object)

In [142]:
data = [['house', 'sector 49', 3, 3, '3+', 'New Property', 1750, 0, 0, 'unfurnised', 'Low', 'Low Floor']]
columns = ['property_type', 'sector', 'bedRoom', 'bathroom', 'balcony',
       'agePossession', 'built_up_area', 'servant room', 'store room',
       'furnishing_type', 'luxury_category', 'floor_category']

# Convert to DataFrame
one_df = pd.DataFrame(data, columns=columns)

one_df

Unnamed: 0,property_type,sector,bedRoom,bathroom,balcony,agePossession,built_up_area,servant room,store room,furnishing_type,luxury_category,floor_category
0,house,sector 49,3,3,3+,New Property,1750,0,0,unfurnised,Low,Low Floor


In [143]:
np.expm1(pipeline.predict(one_df))

array([2.75437105])

In [146]:

!pip show numpy

Name: numpy
Version: 1.22.4
Summary: NumPy is the fundamental package for array computing with Python.
Home-page: https://www.numpy.org
Author: Travis E. Oliphant et al.
Author-email: 
License: BSD
Location: c:\users\lucky singh\appdata\local\programs\python\python310\lib\site-packages
Requires: 
Required-by: altair, category-encoders, folium, ImageHash, matplotlib, mlxtend, numba, pandas, pandasgui, patsy, phik, pyarrow, pydeck, PyWavelets, scikit-learn, scipy, seaborn, shap, statsmodels, streamlit, transformers, visions, wordcloud, xgboost, ydata-profiling


In [147]:
!pip install pandas




[notice] A new release of pip is available: 23.2.1 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [148]:
!pip show pandas

Name: pandas
Version: 1.3.5
Summary: Powerful data structures for data analysis, time series, and statistics
Home-page: https://pandas.pydata.org
Author: The Pandas Development Team
Author-email: pandas-dev@python.org
License: BSD-3-Clause
Location: c:\users\lucky singh\appdata\local\programs\python\python310\lib\site-packages
Requires: numpy, python-dateutil, pytz
Required-by: altair, category-encoders, mlxtend, pandas-datareader, pandasgui, phik, seaborn, shap, statsmodels, streamlit, visions, ydata-profiling


In [149]:
!pip install --upgrade pandas==2.0


Collecting pandas==2.0

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\Lucky Singh\\AppData\\Local\\Programs\\Python\\Python310\\Lib\\site-packages\\~andas\\_libs\\algos.cp310-win_amd64.pyd'
Consider using the `--user` option or check the permissions.


[notice] A new release of pip is available: 23.2.1 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip



  Downloading pandas-2.0.0-cp310-cp310-win_amd64.whl (11.2 MB)
     ---------------------------------------- 11.2/11.2 MB 2.0 MB/s eta 0:00:00
Installing collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 1.3.5
    Uninstalling pandas-1.3.5:
      Successfully uninstalled pandas-1.3.5


In [151]:
!pip install pickle

ERROR: Could not find a version that satisfies the requirement pickle (from versions: none)
ERROR: No matching distribution found for pickle

[notice] A new release of pip is available: 23.2.1 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip
