#IMPORTING LIBRARIES

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import cross_val_score, KFold
from sklearn.model_selection import GridSearchCV


- READING CSV FILES

In [None]:
import pandas as pd
dash = pd.read_csv('kijiji.csv')
df_1= pd.read_csv('kijiji.csv')
df_2 = pd.read_csv('CSDpop.csv')

In [None]:
df_1.columns

Index(['CSDUID', 'CSDNAME', 'Latitude', 'Longitude', 'adId', 'Title', 'Type',
       'Price', 'Location', 'Bedrooms', 'Bathrooms', 'Hydro', 'Heat', 'Water',
       'Size', 'Agreement Type', 'URL', 'Date Posted'],
      dtype='object')

In [None]:
df_1 = df_1.drop(['CSDNAME','Longitude', 'Location', 'Title','adId','URL','Date Posted'],axis=1)

In [None]:
df_1['Size'] = pd.to_numeric(df_1['Size'], errors='coerce' )

-  This code is setting any "Size" values in the DataFrame df_1 that are less than 200 or greater than 7000 to NaN

In [None]:
import numpy as np
for index, row in df_1.iterrows ():
   if row["Size"] < 200 or row["Size"] > 7000:
      df_1.at [index, 'Size'] = np. nan

- FILLING NULL VALUES WITH MEAN IN SIZE COLUMN.

In [None]:
df_1['Size'].fillna(value=df_1.Size.mean(), inplace=True)

In [None]:
df_1['Price'] = df_1['Price'].str.replace('$','')
df_1['Price'] = df_1['Price'].str.replace(',','')
df_1['Price'] = pd.to_numeric(df_1['Price'], errors='coerce' )

In [None]:
merged_df_1 = df_1.merge(df_2, on ='CSDUID', how= 'left')

In [None]:
merged_df_1.columns

Index(['CSDUID', 'Latitude', 'Type', 'Price', 'Bedrooms', 'Bathrooms', 'Hydro',
       'Heat', 'Water', 'Size', 'Agreement Type', 'Population'],
      dtype='object')

In [None]:
merged_df_1 = merged_df_1.drop(['CSDUID'],axis=1)

In [None]:
merged_df_1['Type'].value_counts()

Type
Apartment         9685
House             5526
Basement          3689
Condo             3419
Townhouse         2399
Duplex/Triplex    1014
Name: count, dtype: int64

- This code categorizes the prices in the "Price" column of merged_df_1 into three categories ("low", "Medium", "High") based on the defined conditions, and stores the result in a new column "Price_Category".

In [None]:
def priceConvert(p):
  if p <= 1400:
    return "low"
  elif p <= 2000 and p >= 1400:
    return "Medium"
  elif p > 2000:
    return "High"

merged_df_1["Price_Category"] = merged_df_1["Price"].apply(priceConvert)

In [None]:
merged_df_1['Price_Category'].head()

0    Medium
1       low
2      High
3       low
4      High
Name: Price_Category, dtype: object

In [None]:
merged_df_1.head()

Unnamed: 0,Latitude,Type,Price,Bedrooms,Bathrooms,Hydro,Heat,Water,Size,Agreement Type,Population,Price_Category
0,45.256161,Apartment,1425.0,Bedrooms: 2,Bathrooms: 1,N,Y,Y,950.0,1 Year,14211,Medium
1,45.192003,Apartment,1100.0,Bedrooms: 2,Bathrooms: 1,N,N,Y,780.0,1 Year,14211,low
2,45.146672,House,2700.0,Bedrooms: 3,Bathrooms: 2.5,N,Y,Y,693.228433,1 Year,14211,High
3,45.143726,House,1200.0,Bedrooms: 2,Bathrooms: 1.5,N,N,Y,900.0,1 Year,14211,low
4,45.068973,House,4000.0,Bedrooms: 2 + Den,Bathrooms: 3,N,Y,Y,693.228433,1 Year,14211,High


In [None]:
merged_df_1['Type'] = merged_df_1['Type'].astype('category').cat.codes

- This process transforms categorical columns into a format that machine learning algorithms can better understand, where each category is represented by a binary (0 or 1) value in its own column.

In [None]:
one_hot_encoded1 = pd.get_dummies(merged_df_1['Type'], prefix='PropType')
one_hot_encoded2 = pd.get_dummies(merged_df_1['Bedrooms'], prefix='Bedrooms')
one_hot_encoded3 = pd.get_dummies(merged_df_1['Bathrooms'], prefix='Bathrooms')
one_hot_encoded4 = pd. get_dummies(merged_df_1['Hydro'], prefix='Hydro')
one_hot_encoded5 = pd. get_dummies(merged_df_1['Heat'], prefix='Heat')
one_hot_encoded6 = pd. get_dummies(merged_df_1['Water'], prefix='Water')
one_hot_encoded7 = pd. get_dummies(merged_df_1['Agreement Type'], prefix='Agreement')


#Concatenate the one-hot encoded columns with the original Dataframe
df_encoded = pd.concat([merged_df_1, one_hot_encoded1,one_hot_encoded2, one_hot_encoded3, one_hot_encoded4, one_hot_encoded5, one_hot_encoded6,one_hot_encoded7], axis=1)

In [None]:
df_encoded = df_encoded.drop(['Type','Bedrooms','Bathrooms','Hydro','Heat','Water','Agreement Type'], axis=1)

In [None]:
df_encoded.head()

Unnamed: 0,Latitude,Price,Size,Population,Price_Category,PropType_0,PropType_1,PropType_2,PropType_3,PropType_4,...,Bathrooms_Bathrooms: 6+,Hydro_N,Hydro_Y,Heat_N,Heat_Y,Water_N,Water_Y,Agreement_1 Year,Agreement_Month-to-month,Agreement_Not Available
0,45.256161,1425.0,950.0,14211,Medium,True,False,False,False,False,...,False,True,False,False,True,False,True,True,False,False
1,45.192003,1100.0,780.0,14211,low,True,False,False,False,False,...,False,True,False,True,False,False,True,True,False,False
2,45.146672,2700.0,693.228433,14211,High,False,False,False,False,True,...,False,True,False,False,True,False,True,True,False,False
3,45.143726,1200.0,900.0,14211,low,False,False,False,False,True,...,False,True,False,True,False,False,True,True,False,False
4,45.068973,4000.0,693.228433,14211,High,False,False,False,False,True,...,False,True,False,False,True,False,True,True,False,False


In [None]:
y = df_encoded["Price_Category"]
X = df_encoded.drop(["Price_Category"], axis=1)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# A scaler: StandardScaler()
scaler = StandardScaler()
# Fit the scaler to the training data and transform it
X_train = scaler.fit_transform(X_train)

# Transform the testing data using the fitted scaler
X_test = scaler.transform(X_test)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit the scaler to the training data and transform it
X_train = scaler.fit_transform(X_train)

# Transform the testing data using the fitted scaler
X_test = scaler.transform(X_test)

In [None]:
X_scaled = X

In [None]:
y.dtypes

dtype('O')

# This code trains a logistic regression model, evaluates its performance using accuracy, and provides a detailed classification report containing additional performance metrics.







In [None]:
from sklearn.metrics import classification_report
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracylg = accuracy_score(y_test, y_pred)
print("Logistic Regression :", accuracylg)
report = classification_report(y_test, y_pred)
print("Classification Report:", report)

Logistic Regression : 0.9953370895667379
Classification Report:               precision    recall  f1-score   support

        High       1.00      0.99      1.00      2559
      Medium       0.99      1.00      0.99      1868
         low       0.99      1.00      1.00       720

    accuracy                           1.00      5147
   macro avg       0.99      1.00      1.00      5147
weighted avg       1.00      1.00      1.00      5147



- This code assesses the performance of the logistic regression model using k-fold cross-validation, where the data is split into 5 folds, shuffled, and the model is trained and evaluated on each fold

In [None]:
kf1 = KFold(n_splits=9, shuffle=True, random_state=42)
cv_scores_kf1 = cross_val_score(logreg, X_scaled, y, cv=kf1)
print("Cross-validation scores (KFold 1):", np.mean(cv_scores_kf1))

Cross-validation scores (KFold 1): 0.8287353585359881


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Define a smaller parameter grid for hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10],          # Reduced regularization parameter options
    'penalty': ['l1', 'l2'],    # Penalty term
    'solver': ['liblinear']     # Use only 'liblinear' solver
}

# Perform grid search with cross-validation
grid_search = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=None, scoring='accuracy')
grid_search.fit(X_scaled, y)

# Get the best logistic regression model
best_logreg_model = grid_search.best_estimator_

# Predict on the test set
y_pred = best_logreg_model.predict(X_test)

# Calculate accuracy
accuracy_lgscv = accuracy_score(y_test, y_pred)

# Print the best parameters found
print("Best Parameters:", grid_search.best_params_)

# Get the mean cross-validation score for the best model
print("Mean Cross-validation Score:", grid_search.best_score_)


Best Parameters: {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}
Mean Cross-validation Score: 0.9762555904975153




In [None]:

from sklearn.neighbors import KNeighborsClassifier

# Create KNN classifier
knn = KNeighborsClassifier()

# Fit the model
knn.fit(X_train, y_train)

# Predict on the test set
y_pred_knn = knn.predict(X_test)

# Calculate accuracy
accuracy_knn = accuracy_score(y_test, y_pred_knn)

# Print accuracy
print("KNN Accuracy:", accuracy_knn)

# Generate classification report
report_knn = classification_report(y_test, y_pred_knn)
print("KNN Classification Report:")
print(report_knn)


KNN Accuracy: 0.8612784146104527
KNN Classification Report:
              precision    recall  f1-score   support

        High       0.91      0.93      0.92      2559
      Medium       0.81      0.83      0.82      1868
         low       0.83      0.71      0.76       720

    accuracy                           0.86      5147
   macro avg       0.85      0.82      0.83      5147
weighted avg       0.86      0.86      0.86      5147



In [None]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.neighbors import KNeighborsClassifier

# Define KNN classifier
knn = KNeighborsClassifier()

# Initialize KFold cross-validator
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation
cv_scores_knn = cross_val_score(knn, X_scaled, y, cv=kf)

# Print mean cross-validation score
print("Mean Cross-validation Score (KNN):", np.mean(cv_scores_knn))


Mean Cross-validation Score (KNN): 0.9865925165845102


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Define the parameter grid for KNN hyperparameter tuning
param_grid_knn = {
    'n_neighbors': [3, 5, 7],   # Number of neighbors
    'weights': ['uniform', 'distance'],  # Weight function
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],  # Algorithm to compute nearest neighbors
}

# Perform grid search with cross-validation for KNN
grid_search_knn = GridSearchCV(KNeighborsClassifier(), param_grid_knn, cv=None, scoring='accuracy')
grid_search_knn.fit(X_scaled, y)

# Get the best KNN model
best_knn_model = grid_search_knn.best_estimator_

# Predict on the test set
y_pred_knn = best_knn_model.predict(X_test)

# Calculate accuracy
accuracy_knn = accuracy_score(y_test, y_pred_knn)

# Print the best parameters found for KNN
print("Best Parameters (KNN):", grid_search_knn.best_params_)

# Get the mean cross-vali




Best Parameters (KNN): {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'uniform'}


In [None]:
from sklearn.tree import DecisionTreeClassifier

# Create Decision Tree classifier
dt = DecisionTreeClassifier()

# Fit the model
dt.fit(X_train, y_train)

# Predict on the test set
y_pred_dt = dt.predict(X_test)

# Calculate accuracy
accuracy_dt = accuracy_score(y_test, y_pred_dt)

# Print accuracy
print("Decision Tree Accuracy:", accuracy_dt)

# Generate classification report
report_dt = classification_report(y_test, y_pred_dt)
print("Decision Tree Classification Report:")
print(report_dt)


Decision Tree Accuracy: 1.0
Decision Tree Classification Report:
              precision    recall  f1-score   support

        High       1.00      1.00      1.00      2559
      Medium       1.00      1.00      1.00      1868
         low       1.00      1.00      1.00       720

    accuracy                           1.00      5147
   macro avg       1.00      1.00      1.00      5147
weighted avg       1.00      1.00      1.00      5147



In [None]:
from sklearn.tree import DecisionTreeClassifier

# Define Decision Tree classifier
dt = DecisionTreeClassifier()

# Perform cross-validation
cv_scores_dt = cross_val_score(dt, X_scaled, y, cv=kf)

# Print mean cross-validation score
print("Mean Cross-validation Score (Decision Tree):", np.mean(cv_scores_dt))


Mean Cross-validation Score (Decision Tree): 1.0


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Define the parameter grid for Decision Tree hyperparameter tuning
param_grid_dt = {
    'criterion': ['gini', 'entropy'],   # Split criterion
    'max_depth': [None, 10, 20, 30],     # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],     # Minimum number of samples required to split a node
    'min_samples_leaf': [1, 2, 4]        # Minimum number of samples required at each leaf node
}

# Perform grid search with cross-validation for Decision Tree
grid_search_dt = GridSearchCV(DecisionTreeClassifier(), param_grid_dt, cv=None, scoring='accuracy')
grid_search_dt.fit(X_scaled, y)

# Get the best Decision Tree model
best_dt_model = grid_search_dt.best_estimator_

# Predict on the test set
y_pred_dt = best_dt_model.predict(X_test)

# Calculate accuracy
accuracy_dt = accuracy_score(y_test, y_pred_dt)

# Print the best parameters found for Decision Tree
print("Best Parameters (Decision Tree):", grid_search_dt.best_params_)

# Get the mean cross-validation score for the best Decision Tree model
print("Mean Cross-validation Score (Decision Tree):", grid_search_dt.best_score_)


Best Parameters (Decision Tree): {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Mean Cross-validation Score (Decision Tree): 1.0




In [None]:
from sklearn.ensemble import RandomForestClassifier

# Create Random Forest classifier
rf = RandomForestClassifier()

# Fit the model
rf.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = rf.predict(X_test)

# Calculate accuracy
accuracy_rf = accuracy_score(y_test, y_pred_rf)

# Print accuracy
print("Random Forest Accuracy:", accuracy_rf)

# Generate classification report
report_rf = classification_report(y_test, y_pred_rf)
print("Random Forest Classification Report:")
print(report_rf)


Random Forest Accuracy: 1.0
Random Forest Classification Report:
              precision    recall  f1-score   support

        High       1.00      1.00      1.00      2559
      Medium       1.00      1.00      1.00      1868
         low       1.00      1.00      1.00       720

    accuracy                           1.00      5147
   macro avg       1.00      1.00      1.00      5147
weighted avg       1.00      1.00      1.00      5147



In [None]:
from sklearn.ensemble import RandomForestClassifier

# Define Random Forest classifier
rf = RandomForestClassifier()

# Perform cross-validation
cv_scores_rf = cross_val_score(rf, X_scaled, y, cv=kf)

# Print mean cross-validation score
print("Mean Cross-validation Score (Random Forest):", np.mean(cv_scores_rf))


Mean Cross-validation Score (Random Forest): 0.9996891091003397


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Define a smaller parameter grid for Random Forest hyperparameter tuning
param_grid_rf = {
    'n_estimators': [50, 100],           # Number of trees in the forest
    'max_depth': [None, 10],             # Maximum depth of the tree
    'min_samples_split': [2, 5],         # Minimum number of samples required to split a node
    'min_samples_leaf': [1, 2]           # Minimum number of samples required at each leaf node
}

# Perform grid search with cross-validation for Random Forest
grid_search_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=None, scoring='accuracy')
grid_search_rf.fit(X_scaled, y)

# Get the best Random Forest model
best_rf_model = grid_search_rf.best_estimator_

# Predict on the test set
y_pred_rf = best_rf_model.predict(X_test)

# Calculate accuracy
accuracy_rf = accuracy_score(y_test, y_pred_rf)

# Print the best parameters found for Random Forest
print("Best Parameters for Random Forest:", grid_search_rf.best_params_)

# Get the mean cross-validation score for the best Random Forest model
print("Mean Cross-validation Score for Random Forest:", grid_search_rf.best_score_)



Best Parameters for Random Forest: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Mean Cross-validation Score for Random Forest: 0.9983676944093174




In [None]:
from sklearn.ensemble import GradientBoostingClassifier

# Create Gradient Boosting classifier
gb = GradientBoostingClassifier()

# Fit the model
gb.fit(X_train, y_train)

# Predict on the test set
y_pred_gb = gb.predict(X_test)

# Calculate accuracy
accuracy_gb = accuracy_score(y_test, y_pred_gb)

# Print accuracy
print("Gradient Boosting Accuracy:", accuracy_gb)

# Generate classification report
report_gb = classification_report(y_test, y_pred_gb)
print("Gradient Boosting Classification Report:")
print(report_gb)


Gradient Boosting Accuracy: 1.0
Gradient Boosting Classification Report:
              precision    recall  f1-score   support

        High       1.00      1.00      1.00      2559
      Medium       1.00      1.00      1.00      1868
         low       1.00      1.00      1.00       720

    accuracy                           1.00      5147
   macro avg       1.00      1.00      1.00      5147
weighted avg       1.00      1.00      1.00      5147



In [None]:
from sklearn.ensemble import GradientBoostingClassifier

# Define Gradient Boosting classifier
gb = GradientBoostingClassifier()

# Perform cross-validation
cv_scores_gb = cross_val_score(gb, X_scaled, y, cv=kf)

# Print mean cross-validation score
print("Mean Cross-validation Score (Gradient Boosting):", np.mean(cv_scores_gb))


Mean Cross-validation Score (Gradient Boosting): 1.0


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

# Define a smaller parameter grid for Gradient Boosting hyperparameter tuning
param_grid_gb = {
    'n_estimators': [50, 100],          # Number of boosting stages
    'learning_rate': [0.01, 0.1],       # Learning rate shrinks the contribution of each tree
    'max_depth': [3, 5],                # Maximum depth of the individual trees
    'min_samples_split': [2, 5],        # Minimum number of samples required to split a node
    'min_samples_leaf': [1, 2]          # Minimum number of samples required at each leaf node
}

# Perform grid search with cross-validation for Gradient Boosting
grid_search_gb = GridSearchCV(GradientBoostingClassifier(), param_grid_gb, cv=None, scoring='accuracy')
grid_search_gb.fit(X_scaled, y)

# Get the best Gradient Boosting model
best_gb_model = grid_search_gb.best_estimator_

# Predict on the test set
y_pred_gb = best_gb_model.predict(X_test)

# Calculate accuracy
accuracy_gb = accuracy_score(y_test, y_pred_gb)

# Print the best parameters found for Gradient Boosting
print("Best Parameters for Gradient Boosting:", grid_search_gb.best_params_)

# Get the mean cross-validation score for the best Gradient Boosting model
print("Mean Cross-validation Score for Gradient Boosting:", grid_search_gb.best_score_)


Best Parameters for Gradient Boosting: {'learning_rate': 0.01, 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Mean Cross-validation Score for Gradient Boosting: 1.0




In [None]:
!pip install streamlit joblib

In [None]:
import joblib

# Assuming 'random_forest' is your best trained Random Forest Regressor model
model = rf  # Replace with your actual model

# Specify the file path where you want to save the model
model_filename = 'rf.pkl'

# Save the model to a .pkl file
joblib.dump(model, model_filename)

['rf.pkl']

In [None]:
dash.head()

Unnamed: 0,CSDUID,CSDNAME,Latitude,Longitude,adId,Title,Type,Price,Location,Bedrooms,Bathrooms,Hydro,Heat,Water,Size,Agreement Type,URL,Date Posted
0,3501005,South Glengarry,45.256161,-74.595385,1618448386,2 Bedroom Apartment For Rent In Green Valley,Apartment,"$1,425","Green Valley, ON K0C 1L0",Bedrooms: 2,Bathrooms: 1,N,Y,Y,950,1 Year,https://www.kijiji.ca/v-apartments-condos/corn...,5/25/2022 4:30
1,3501005,South Glengarry,45.192003,-74.374604,1617525343,Private 2 BDR in-law-suite upstairs. 1Aug. $1...,Apartment,"$1,100","21882 Old Hwy 2, Bainsville, ON K0C 1E0, Canada",Bedrooms: 2,Bathrooms: 1,N,N,Y,780,1 Year,https://www.kijiji.ca/v-apartments-condos/corn...,5/18/2022 12:34
2,3501005,South Glengarry,45.146672,-74.579565,1609020142,Beautiful Country House near Williamstown - $2...,House,"$2,700","Williamstown, ON K0C 2J0",Bedrooms: 3,Bathrooms: 2.5,N,Y,Y,2200,1 Year,https://www.kijiji.ca/v-apartments-condos/corn...,5/21/2022 21:51
3,3501005,South Glengarry,45.143726,-74.502708,1617264438,"6686 Lawrence st, South Glengarry K6h 5r5",House,"$1,200","6686 Lawrence St, Cornwall, South Glengarry, ON",Bedrooms: 2,Bathrooms: 1.5,N,N,Y,900,1 Year,https://www.kijiji.ca/v-apartments-condos/corn...,5/16/2022 15:41
4,3501005,South Glengarry,45.068973,-74.542873,1617233473,Water Front House for Rent Long Term,House,"$4,000","19414 Malibu Ln, Summerstown, ON K0C 2E0, Canada",Bedrooms: 2 + Den,Bathrooms: 3,N,Y,Y,1400,1 Year,https://www.kijiji.ca/v-apartments-condos/corn...,5/16/2022 11:33


In [144]:
import streamlit as st
import streamlit as st
import pandas as pd
import plotly.express as px
import joblib
import folium


# Load the dataset with a specified encoding
data = pd.read_csv('kijiji.csv', encoding='latin1')

# Page 1: Dashboard
def dashboard():
    st.image('Logo.PNG', use_column_width=True)
    st.subheader("💡 Abstract:")
    inspiration = '''
The task is to collect accurate and abundant data for small Canadian communities and derive useful insights to support their local initiatives.
    '''
    st.write(inspiration)
    st.subheader("👨🏻‍💻 What our Project Does?")
    what_it_does = '''
Throughout this project, we did collecte, cleaned and find usefull insight from the dataset, which is available from kijiji for the smaller community in canada. Followed by Implimenting and deploing Machine learning model to perdict the price of different rental property.
'''
    st.write(what_it_does)

# Page 2: Exploratory Data Analysis (EDA)
def exploratory_data_analysis():
    st.title("Exploratory Data Analysis")

    # Price Distribution
    fig = px.histogram(data, x='Price', nbins=20, title='Distribution of Rental Prices')
    st.plotly_chart(fig)

    # Boxplot for Price by Property Type
    fig = px.box(data, x='Type', y='Price', title='Price Distribution by Property Type')
    st.plotly_chart(fig)

# Page 3: Machine Learning Modeling
def machine_learning_modeling():
    st.title("Kijiji Rental Price Prediction")
    st.write("Enter the details of the property to predict its rental price:")

    # Input fields for user to enter data
    property_type = st.selectbox("Type of Property", ['Apartment', 'House', 'Condo', 'Townhouse'])
    bedrooms = st.slider("Number of Bedrooms", 1, 5, 2)
    bathrooms = st.slider("Number of Bathrooms", 1, 3, 1)
    size = st.slider("Size (sqft)", 300, 5000, 1000)
    unique_locations = data['CSDNAME'].unique()
    location = st.selectbox("Location", unique_locations)

    if st.button("Predict"):
        # Load the trained model including preprocessing
        model = joblib.load('rf.pkl')

        # Assuming the model_with_preprocessing is a pipeline that ends with your estimator
        # Prepare input data as a DataFrame to match the training data structure
        input_df = pd.DataFrame({
            'Type': [property_type],
            'Bedrooms': [bedrooms],
            'Bathrooms': [bathrooms],
            'Size': [size],
            'CSDNAME': [location]
        })

        # Make prediction
        prediction = model.predict(input_df)

        # Display the prediction
        st.success(f"Predicted Rental Price: ${prediction[0]:,.2f}")

# Page 4: Community Mapping
def community_mapping():
    st.title("Small Communities Map: Population <10000")
    geodata = pd.read_csv("smallcomunity_EDA_65.csv")

    # Optional: Set your Mapbox token (if you want to use Mapbox styles)
    # px.set_mapbox_access_token('YOUR_MAPBOX_TOKEN_HERE')

    # Create the map using Plotly Express
    fig = px.scatter_mapbox(geodata,
                            lat='Latitude',
                            lon='Longitude',
                            color='Population',  # Color points by population, or choose another column
                            size='Price',  # Size points by price, or choose another column
                            color_continuous_scale=px.colors.cyclical.IceFire,
                            size_max=15,
                            zoom=10,
                            hover_name='Type',  # Display property type when hovering over points
                            hover_data={'Price': True, 'Population': True, 'Latitude': False, 'Longitude': False},
                            title='Small Communities Map')

    fig.update_layout(mapbox_style="open-street-map")  # Use OpenStreetMap style
    st.plotly_chart(fig)


# Main App Logic
def main():
    st.sidebar.title("Kijiji Community App")
    app_page = st.sidebar.radio("Select a Page", ["Dashboard", "EDA", "ML Modeling", "Community Mapping"])

    if app_page == "Dashboard":
        dashboard()
    elif app_page == "EDA":
        exploratory_data_analysis()
    elif app_page == "ML Modeling":
        machine_learning_modeling()
    elif app_page == "Community Mapping":
        community_mapping()

if __name__ == "__main__":
    main()

In [137]:
import streamlit as st
import pandas as pd
import plotly.express as px

# Load the dataset with a specified encoding
data = pd.read_csv('kijiji_cleaned.csv', encoding='latin1')

# Page 1: Dashboard
def dashboard():
  st.image('Logo.PNG', use_column_width=True)
  st.subheader("💡 Abstract:")
  inspiration = '''
        This project delves into analyzing the Ontario rental market using machine learning techniques. We cleaned and explored the dataset to uncover trends and factors influencing rental prices. By developing predictive models, we aimed to forecast rental prices and understand market dynamics. Additionally, we attempted to enhance our analysis by integrating population data and focusing on small communities to observe any changes in rental housing. The deployment of these models allows users to access real-time rental price predictions and market insights, facilitating decision-making processes related to renting properties in Ontario.
    '''
  st.write(inspiration)
  st.subheader("👨🏻‍💻 What our Project Does?:Exploratory Data Analysis (EDA), and Visualization, ML and deployment.")
  what_it_does = '''
    This project is all about understanding the rental market in Ontario, Canada, using computers and math to help us learn. First, we looked at a lot of rental information to find out what affects rental prices. Then, we used what we learned to make predictions about how much it might cost to rent a place in different areas. We also wanted to see if there were any changes in rental housing in small communities when more people lived there. Finally, we made a way for people to use our predictions and insights to help them decide where to rent in Ontario..
    '''
  st.write(what_it_does)

# Page 2: Exploratory Data Analysis (EDA)
def exploratory_data_analysis():
  st.title("Exploratory Data Analysis")

    # Price Distribution
  fig = px.histogram(data, x='Price', nbins=20, title='Distribution of Rental Prices')
  st.plotly_chart(fig)

    # Boxplot for Price by Property Type
  fig = px.box(data, x='Type', y='Price', title='Price Distribution by Property Type')
  st.plotly_chart(fig)

# Page 3: Machine Learning Modeling
def machine_learning_modeling():
  st.title("Kijiji Rental Price Prediction")
  st.write("Enter the details of the property to predict its rental price:")

    # Input fields for user to enter data
  property_type = st.selectbox("Type of Property", ['Apartment', 'House', 'Condo', 'Townhouse'])
  bedrooms = st.slider("Number of Bedrooms", 1, 5, 2)
  bathrooms = st.slider("Number of Bathrooms", 1, 3, 1)
  size = st.slider("Size (sqft)", 300, 5000, 1000)
  unique_locations = data['CSDNAME'].unique()
  location = st.selectbox("Location", unique_locations)

  if st.button("Predict"):
        # Load the trained model including preprocessing
      model = joblib.load('gradient_boost_regressor_model.pkl')

        # Assuming the model_with_preprocessing is a pipeline that ends with your estimator
        # Prepare input data as a DataFrame to match the training data structure
      input_df = pd.DataFrame({
            'Type': [property_type],
            'Bedrooms': [bedrooms],
            'Bathrooms': [bathrooms],
            'Size': [size],
            'CSDNAME': [location]
        })

        # Make prediction
      prediction = model.predict(input_df)

        # Display the prediction
      st.success(f"Predicted Rental Price: ${prediction[0]:,.2f}")

# Page 4: Community Mapping
def community_mapping():
    st.title("Communities Map")
    geodata = pd.read_csv(".csv")

    # Optional: Set your Mapbox token (if you want to use Mapbox styles)
    # px.set_mapbox_access_token('YOUR_MAPBOX_TOKEN_HERE')

    # Create the map using Plotly Express
    fig = px.scatter_mapbox(geodata,
                            lat='Latitude',
                            lon='Longitude',
                            color='Population',  # Color points by population, or choose another column
                            size='Price',  # Size points by price, or choose another column
                            color_continuous_scale=px.colors.cyclical.IceFire,
                            size_max=15,
                            zoom=10,
                            hover_name='Type',  # Display property type when hovering over points
                            hover_data={'Price': True, 'Population': True, 'Latitude': False, 'Longitude': False},
                            title='Small Communities Map')

    fig.update_layout(mapbox_style="open-street-map")  # Use OpenStreetMap style
    st.plotly_chart(fig)


# Main App Logic
def main():
    st.sidebar.title("Kijiji Community App")
    app_page = st.sidebar.radio("Select a Page", ["Dashboard", "EDA", "ML Modeling", "Community Mapping"])

    if app_page == "Dashboard":
        dashboard()
    elif app_page == "EDA":
        exploratory_data_analysis()
    elif app_page == "ML Modeling":
        machine_learning_modeling()
    elif app_page == "Community Mapping":
        community_mapping()

if __name__ == "__main__":
    main()

In [145]:
!npm install localtunnel

[K[?25h[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35msaveError[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35menoent[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No description
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No repository field.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No README data
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No license field.
[0m
[K[?25h+ localtunnel@2.0.2
updated 1 package and audited 36 packages in 0.502s

3 packages are looking for funding
  run `npm fund` for details

found 2 [93mmoderate[0m severity vulnerabilities
  run `npm audit fix` to fix them, or `npm audit` for details


In [146]:
!streamlit run /content/app.py &>/content/logs.txt & curl ipv4.icanhazip.com

34.48.61.70


In [None]:
!npx localtunnel --port 8501

[K[?25hnpx: installed 22 in 1.898s
your url is: https://funny-results-sniff.loca.lt
