# Hackathon!
## Restaurant Turnover Prediction
The predicted Annual Turnover for each restaurant in the Test dataset will be compared with the actual Annual Turnover to calculate the RMSE value of the entire prediction. The lower the RMSE value, the better the model will be.

### Rules
* The measure of accuracy will be RMSE (Root mean square error).
* Submission File Format: You are to submit a  '.csv' file with exactly 500 entries plus a header row. The file should have exactly two columns.

---
---



| Author Project Submission| Date |
| --- | --- |
| Rob Barker | July 24, 2024 | 

In [2]:
# Libraries to help with reading and manipulating data.
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Importing Simple Imputer to treat the null values.
from sklearn.impute import SimpleImputer

# Suppress warnings (FutureWarning).
import warnings
warnings.filterwarnings("ignore", category= FutureWarning)

In [5]:
# Read the training and testing datasets.
hackathon_train_org_df = pd.read_csv("/Users/barkz/Desktop/github-barkz/Restaurant-Turnover-Prediction/Train_dataset_(1).csv")
hackathon_test_org_df = pd.read_csv("/Users/barkz/Desktop/github-barkz/Restaurant-Turnover-Prediction/Test_dataset_(1).csv")

# Create a copy of the original DataFrame to avoid modifying the original data.
hackathon_train_df = hackathon_train_org_df.copy()
hackathon_test_df = hackathon_test_org_df.copy()

# Print the first 5 rows of hackathon_train_df as formatted data.
print("First 5 rows of hackathon_train_df:")
hackathon_train_df.head(5)

First 5 rows of hackathon_train_df:


Unnamed: 0,Registration Number,Annual Turnover,Cuisine,City,Restaurant Location,Opening Day of Restaurant,Facebook Popularity Quotient,Endorsed By,Instagram Popularity Quotient,Fire Audit,...,Overall Restaurant Rating,Live Music Rating,Comedy Gigs Rating,Value Deals Rating,Live Sports Rating,Ambience,Lively,Service,Comfortablility,Privacy
0,60001,42000000,"indian,irish",Bangalore,Near Business Hub,14-02-2009,84.3,Not Specific,95.8,1,...,10.0,4.0,,,,8.0,8,6,6,6
1,60002,50000000,"indian,irish",Indore,Near Party Hub,29-09-2008,85.4,Tier A Celebrity,85.0,1,...,9.0,,4.0,,,5.0,7,7,3,8
2,60003,32500000,"tibetan,italian",Chennai,Near Business Hub,30-07-2011,85.0,Tier A Celebrity,68.2,1,...,8.0,3.0,,,,7.0,10,5,2,8
3,60004,110000000,"turkish,nigerian",Gurgaon,Near Party Hub,30-11-2008,85.6,Tier A Celebrity,83.6,0,...,9.0,6.0,,,,7.0,7,4,3,5
4,60005,20000000,"irish,belgian",Manesar,Near Party Hub,22-02-2010,,Tier A Celebrity,76.8,1,...,6.0,,2.0,,,,6,2,4,6


In [6]:
# Print the first 5 rows of hackathon_test_df as formatted data.
print("First 5 rows of hackathon_test_df:")
hackathon_test_df.head(5)

First 5 rows of hackathon_test_df:


Unnamed: 0,Registration Number,Cuisine,City,Restaurant Location,Opening Day of Restaurant,Facebook Popularity Quotient,Endorsed By,Instagram Popularity Quotient,Fire Audit,Liquor License Obtained,...,Overall Restaurant Rating,Live Music Rating,Comedy Gigs Rating,Value Deals Rating,Live Sports Rating,Ambience,Lively,Service,Comfortablility,Privacy
0,20001,"tibetan,italian",Bangalore,Near Business Hub,13-07-2010,78.0,Tier A Celebrity,69.0,1,1,...,6.0,,2.0,,,5,2,2,6,0
1,20002,"tibetan,italian",Hyderabad,Near Party Hub,05-09-2011,89.17,Not Specific,96.0,1,1,...,9.0,3.0,,,6.0,7,8,5,1,8
2,20003,"algerian,belgian",Hyderabad,Near Party Hub,12-04-2011,84.0,Not Specific,86.0,1,1,...,6.0,3.0,,,,9,7,5,1,5
3,20004,"tibetan,greek",-1,Near Party Hub,16-01-2005,79.38,Not Specific,74.4,0,1,...,9.0,6.0,,,,6,7,6,4,7
4,20005,"cuban,british",Pune,Near Party Hub,10-11-2008,84.67,Not Specific,86.46,1,1,...,,,,,,4,4,7,7,3


In [7]:
# Checking for the null values in train dataset.
hackathon_train_df.isnull().sum()


Registration Number                 0
Annual Turnover                     0
Cuisine                             0
City                                0
Restaurant Location                 0
Opening Day of Restaurant           0
Facebook Popularity Quotient       99
Endorsed By                         0
Instagram Popularity Quotient      56
Fire Audit                          0
Liquor License Obtained             0
Situated in a Multi Complex         0
Dedicated Parking                   0
Open Sitting Available              0
Resturant Tier                     49
Restaurant Type                     0
Restaurant Theme                    0
Restaurant Zomato Rating            0
Restaurant City Tier                0
Order Wait Time                     0
Staff Responsivness                 0
Value for Money                     0
Hygiene Rating                      0
Food Rating                         0
Overall Restaurant Rating         212
Live Music Rating                 765
Comedy Gigs 

In [8]:
# Check datatypes.
hackathon_train_df.info()

# Get statistics of the numerical columns.
hackathon_train_df.describe()

# Get columns with object types only.
object_columns = hackathon_train_df.select_dtypes(include=['object']).columns
print('\n Object Columns:', object_columns)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3493 entries, 0 to 3492
Data columns (total 34 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Registration Number            3493 non-null   int64  
 1   Annual Turnover                3493 non-null   int64  
 2   Cuisine                        3493 non-null   object 
 3   City                           3493 non-null   object 
 4   Restaurant Location            3493 non-null   object 
 5   Opening Day of Restaurant      3493 non-null   object 
 6   Facebook Popularity Quotient   3394 non-null   float64
 7   Endorsed By                    3493 non-null   object 
 8   Instagram Popularity Quotient  3437 non-null   float64
 9   Fire Audit                     3493 non-null   int64  
 10  Liquor License Obtained        3493 non-null   int64  
 11  Situated in a Multi Complex    3493 non-null   int64  
 12  Dedicated Parking              3493 non-null   i

In [9]:
# Checking for the null values in test dataset.
hackathon_test_df.isnull().sum()

Registration Number                0
Cuisine                            0
City                               0
Restaurant Location                0
Opening Day of Restaurant          0
Facebook Popularity Quotient       0
Endorsed By                        0
Instagram Popularity Quotient      0
Fire Audit                         0
Liquor License Obtained            0
Situated in a Multi Complex        0
Dedicated Parking                  0
Open Sitting Available             0
Resturant Tier                     0
Restaurant Type                    0
Restaurant Theme                   0
Restaurant Zomato Rating           0
Restaurant City Tier               0
Order Wait Time                    0
Staff Responsivness                0
Value for Money                    0
Hygiene Rating                     0
Food Rating                        0
Overall Restaurant Rating         34
Live Music Rating                102
Comedy Gigs Rating               370
Value Deals Rating               385
L

In [10]:
# Print column names to verify their exact names.
print("Column names in hackathon_train_df:")
print(hackathon_train_df.columns)

# Strip any leading/trailing spaces from the column names.
hackathon_train_df.columns = hackathon_train_df.columns.str.strip()
hackathon_test_df.columns = hackathon_test_df.columns.str.strip()

# Convert object columns to categorical data types.
object_columns = hackathon_train_df.select_dtypes(include=['object']).columns

for col in object_columns:
    hackathon_train_df[col] = hackathon_train_df[col].astype('category')
    hackathon_test_df[col] = hackathon_test_df[col].astype('category')

# Verify the data types.
print("\nData types in hackathon_train_df:")
print(hackathon_train_df.dtypes)
print("\nData types in hackathon_test_df:")
print(hackathon_test_df.dtypes)

Column names in hackathon_train_df:
Index(['Registration Number', 'Annual Turnover', 'Cuisine', 'City',
       'Restaurant Location', 'Opening Day of Restaurant',
       'Facebook Popularity Quotient', 'Endorsed By',
       'Instagram Popularity Quotient', 'Fire Audit',
       'Liquor License Obtained', 'Situated in a Multi Complex',
       'Dedicated Parking', 'Open Sitting Available', 'Resturant Tier',
       'Restaurant Type', 'Restaurant Theme', 'Restaurant Zomato Rating',
       'Restaurant City Tier', 'Order Wait Time', 'Staff Responsivness',
       'Value for Money', 'Hygiene Rating', 'Food Rating',
       'Overall Restaurant Rating', 'Live Music Rating', 'Comedy Gigs Rating',
       'Value Deals Rating', 'Live Sports Rating', 'Ambience', 'Lively',
       'Service', 'Comfortablility', 'Privacy'],
      dtype='object')

Data types in hackathon_train_df:
Registration Number                 int64
Annual Turnover                     int64
Cuisine                          category
Ci

In [11]:
# Print the first 5 rows of hackathon_train_df as formatted data.
hackathon_train_df.head(5)

Unnamed: 0,Registration Number,Annual Turnover,Cuisine,City,Restaurant Location,Opening Day of Restaurant,Facebook Popularity Quotient,Endorsed By,Instagram Popularity Quotient,Fire Audit,...,Overall Restaurant Rating,Live Music Rating,Comedy Gigs Rating,Value Deals Rating,Live Sports Rating,Ambience,Lively,Service,Comfortablility,Privacy
0,60001,42000000,"indian,irish",Bangalore,Near Business Hub,14-02-2009,84.3,Not Specific,95.8,1,...,10.0,4.0,,,,8.0,8,6,6,6
1,60002,50000000,"indian,irish",Indore,Near Party Hub,29-09-2008,85.4,Tier A Celebrity,85.0,1,...,9.0,,4.0,,,5.0,7,7,3,8
2,60003,32500000,"tibetan,italian",Chennai,Near Business Hub,30-07-2011,85.0,Tier A Celebrity,68.2,1,...,8.0,3.0,,,,7.0,10,5,2,8
3,60004,110000000,"turkish,nigerian",Gurgaon,Near Party Hub,30-11-2008,85.6,Tier A Celebrity,83.6,0,...,9.0,6.0,,,,7.0,7,4,3,5
4,60005,20000000,"irish,belgian",Manesar,Near Party Hub,22-02-2010,,Tier A Celebrity,76.8,1,...,6.0,,2.0,,,,6,2,4,6


In [12]:
# Print the first 5 rows of hackathon_test_df as formatted data.
hackathon_test_df.head(5)

Unnamed: 0,Registration Number,Cuisine,City,Restaurant Location,Opening Day of Restaurant,Facebook Popularity Quotient,Endorsed By,Instagram Popularity Quotient,Fire Audit,Liquor License Obtained,...,Overall Restaurant Rating,Live Music Rating,Comedy Gigs Rating,Value Deals Rating,Live Sports Rating,Ambience,Lively,Service,Comfortablility,Privacy
0,20001,"tibetan,italian",Bangalore,Near Business Hub,13-07-2010,78.0,Tier A Celebrity,69.0,1,1,...,6.0,,2.0,,,5,2,2,6,0
1,20002,"tibetan,italian",Hyderabad,Near Party Hub,05-09-2011,89.17,Not Specific,96.0,1,1,...,9.0,3.0,,,6.0,7,8,5,1,8
2,20003,"algerian,belgian",Hyderabad,Near Party Hub,12-04-2011,84.0,Not Specific,86.0,1,1,...,6.0,3.0,,,,9,7,5,1,5
3,20004,"tibetan,greek",-1,Near Party Hub,16-01-2005,79.38,Not Specific,74.4,0,1,...,9.0,6.0,,,,6,7,6,4,7
4,20005,"cuban,british",Pune,Near Party Hub,10-11-2008,84.67,Not Specific,86.46,1,1,...,,,,,,4,4,7,7,3


In [13]:
# Create an instance of SimpleImputer with strategy='mean'
SI = SimpleImputer(strategy='mean')

# Select numeric columns to apply imputation
numeric_columns = hackathon_train_df.select_dtypes(include=['int64', 'float64']).columns

# Fit the imputer on the numeric columns and transform the data
hackathon_train_df[numeric_columns] = SI.fit_transform(hackathon_train_df[numeric_columns])

# Print the first 5 rows of hackathon_train_df after imputation
print("First 5 rows of hackathon_train_df after imputation:")
hackathon_train_df.head(5)

First 5 rows of hackathon_train_df after imputation:


Unnamed: 0,Registration Number,Annual Turnover,Cuisine,City,Restaurant Location,Opening Day of Restaurant,Facebook Popularity Quotient,Endorsed By,Instagram Popularity Quotient,Fire Audit,...,Overall Restaurant Rating,Live Music Rating,Comedy Gigs Rating,Value Deals Rating,Live Sports Rating,Ambience,Lively,Service,Comfortablility,Privacy
0,60001.0,42000000.0,"indian,irish",Bangalore,Near Business Hub,14-02-2009,84.3,Not Specific,95.8,1.0,...,10.0,4.0,2.932673,3.655216,3.590244,8.0,8.0,6.0,6.0,6.0
1,60002.0,50000000.0,"indian,irish",Indore,Near Party Hub,29-09-2008,85.4,Tier A Celebrity,85.0,1.0,...,9.0,4.01283,4.0,3.655216,3.590244,5.0,7.0,7.0,3.0,8.0
2,60003.0,32500000.0,"tibetan,italian",Chennai,Near Business Hub,30-07-2011,85.0,Tier A Celebrity,68.2,1.0,...,8.0,3.0,2.932673,3.655216,3.590244,7.0,10.0,5.0,2.0,8.0
3,60004.0,110000000.0,"turkish,nigerian",Gurgaon,Near Party Hub,30-11-2008,85.6,Tier A Celebrity,83.6,0.0,...,9.0,6.0,2.932673,3.655216,3.590244,7.0,7.0,4.0,3.0,5.0
4,60005.0,20000000.0,"irish,belgian",Manesar,Near Party Hub,22-02-2010,77.938715,Tier A Celebrity,76.8,1.0,...,6.0,4.01283,2.0,3.655216,3.590244,6.42301,6.0,2.0,4.0,6.0


In [14]:
# Verify the data types.
print("\nData types in hackathon_train_df:")
print(hackathon_train_df.dtypes)


Data types in hackathon_train_df:
Registration Number               float64
Annual Turnover                   float64
Cuisine                          category
City                             category
Restaurant Location              category
Opening Day of Restaurant        category
Facebook Popularity Quotient      float64
Endorsed By                      category
Instagram Popularity Quotient     float64
Fire Audit                        float64
Liquor License Obtained           float64
Situated in a Multi Complex       float64
Dedicated Parking                 float64
Open Sitting Available            float64
Resturant Tier                    float64
Restaurant Type                  category
Restaurant Theme                 category
Restaurant Zomato Rating          float64
Restaurant City Tier              float64
Order Wait Time                   float64
Staff Responsivness               float64
Value for Money                   float64
Hygiene Rating                    float64

In [15]:
# Create an instance of SimpleImputer with strategy='mean'.
SI = SimpleImputer(strategy='mean')

# Fit the imputer on the 'Instagram Popularity Quotient' column.
SI.fit(hackathon_train_df[['Instagram Popularity Quotient']])

# Transform the column to replace missing values with the mean.
hackathon_train_df['Instagram Popularity Quotient'] = SI.transform(hackathon_train_df[['Instagram Popularity Quotient']])

# Print the first 5 rows of hackathon_train_df after imputation.
print("First 5 rows of hackathon_train_df after imputation:")
hackathon_train_df.head(5)

First 5 rows of hackathon_train_df after imputation:


Unnamed: 0,Registration Number,Annual Turnover,Cuisine,City,Restaurant Location,Opening Day of Restaurant,Facebook Popularity Quotient,Endorsed By,Instagram Popularity Quotient,Fire Audit,...,Overall Restaurant Rating,Live Music Rating,Comedy Gigs Rating,Value Deals Rating,Live Sports Rating,Ambience,Lively,Service,Comfortablility,Privacy
0,60001.0,42000000.0,"indian,irish",Bangalore,Near Business Hub,14-02-2009,84.3,Not Specific,95.8,1.0,...,10.0,4.0,2.932673,3.655216,3.590244,8.0,8.0,6.0,6.0,6.0
1,60002.0,50000000.0,"indian,irish",Indore,Near Party Hub,29-09-2008,85.4,Tier A Celebrity,85.0,1.0,...,9.0,4.01283,4.0,3.655216,3.590244,5.0,7.0,7.0,3.0,8.0
2,60003.0,32500000.0,"tibetan,italian",Chennai,Near Business Hub,30-07-2011,85.0,Tier A Celebrity,68.2,1.0,...,8.0,3.0,2.932673,3.655216,3.590244,7.0,10.0,5.0,2.0,8.0
3,60004.0,110000000.0,"turkish,nigerian",Gurgaon,Near Party Hub,30-11-2008,85.6,Tier A Celebrity,83.6,0.0,...,9.0,6.0,2.932673,3.655216,3.590244,7.0,7.0,4.0,3.0,5.0
4,60005.0,20000000.0,"irish,belgian",Manesar,Near Party Hub,22-02-2010,77.938715,Tier A Celebrity,76.8,1.0,...,6.0,4.01283,2.0,3.655216,3.590244,6.42301,6.0,2.0,4.0,6.0


In [16]:
# Verify the data types.
print("\nData types in hackathon_train_df:")
print(hackathon_train_df.dtypes)


Data types in hackathon_train_df:
Registration Number               float64
Annual Turnover                   float64
Cuisine                          category
City                             category
Restaurant Location              category
Opening Day of Restaurant        category
Facebook Popularity Quotient      float64
Endorsed By                      category
Instagram Popularity Quotient     float64
Fire Audit                        float64
Liquor License Obtained           float64
Situated in a Multi Complex       float64
Dedicated Parking                 float64
Open Sitting Available            float64
Resturant Tier                    float64
Restaurant Type                  category
Restaurant Theme                 category
Restaurant Zomato Rating          float64
Restaurant City Tier              float64
Order Wait Time                   float64
Staff Responsivness               float64
Value for Money                   float64
Hygiene Rating                    float64

In [17]:
# Create an instance of SimpleImputer with strategy='mean'.
SI = SimpleImputer(strategy='mean')

# Fit the imputer on the 'Instagram Popularity Quotient' column.
SI.fit(hackathon_train_df[['Instagram Popularity Quotient']])

# Transform the column to replace missing values with the mean.
IPQ = SI.transform(hackathon_train_df[['Instagram Popularity Quotient']])

# Print the transformed column.
print("Transformed 'Instagram Popularity Quotient' column:")
print(IPQ)

Transformed 'Instagram Popularity Quotient' column:
[[95.8]
 [85. ]
 [68.2]
 ...
 [86.8]
 [86. ]
 [67. ]]


In [18]:
# Declaring the Linear Regression function.
LR = LinearRegression()

# Fitting the Linear Regression function.
model = LR.fit(IPQ, hackathon_train_df['Annual Turnover'])

# Print the model to confirm.
print("Linear Regression model:", model)

# Checking the score of the function on the training data.
print('Model score: ' + str(model.score(IPQ, hackathon_train_df['Annual Turnover'])))

Linear Regression model: LinearRegression()
Model score: 0.026482712263984687


# Loading the Test Data and using the Test Data to Predict the target Variable

**Note:** In the test dataset the spelling of column "Endorsed By" was "Endoresed By". Before using the dataset the column name was updated. 

In [19]:
# Testing dataset loaded previously at start of notebook.
#hackathon_test_org_df = pd.read_csv("/Users/robbarker/Desktop/Restaurant-Turnover-Prediction/Test_dataset_(1).csv")

# Create a copy of the original DataFrame to avoid modifying the original data.
hackathon_test_df = hackathon_test_org_df.copy()

# Print the first 5 rows of hackathon_test_df as formatted data.
print("First 5 rows of hackathon_test_df:")
hackathon_test_df.head(5)

First 5 rows of hackathon_test_df:


Unnamed: 0,Registration Number,Cuisine,City,Restaurant Location,Opening Day of Restaurant,Facebook Popularity Quotient,Endorsed By,Instagram Popularity Quotient,Fire Audit,Liquor License Obtained,...,Overall Restaurant Rating,Live Music Rating,Comedy Gigs Rating,Value Deals Rating,Live Sports Rating,Ambience,Lively,Service,Comfortablility,Privacy
0,20001,"tibetan,italian",Bangalore,Near Business Hub,13-07-2010,78.0,Tier A Celebrity,69.0,1,1,...,6.0,,2.0,,,5,2,2,6,0
1,20002,"tibetan,italian",Hyderabad,Near Party Hub,05-09-2011,89.17,Not Specific,96.0,1,1,...,9.0,3.0,,,6.0,7,8,5,1,8
2,20003,"algerian,belgian",Hyderabad,Near Party Hub,12-04-2011,84.0,Not Specific,86.0,1,1,...,6.0,3.0,,,,9,7,5,1,5
3,20004,"tibetan,greek",-1,Near Party Hub,16-01-2005,79.38,Not Specific,74.4,0,1,...,9.0,6.0,,,,6,7,6,4,7
4,20005,"cuban,british",Pune,Near Party Hub,10-11-2008,84.67,Not Specific,86.46,1,1,...,,,,,,4,4,7,7,3


In [20]:
# Checking for the null values in test dataset.
hackathon_test_df.isnull().sum()

Registration Number                0
Cuisine                            0
City                               0
Restaurant Location                0
Opening Day of Restaurant          0
Facebook Popularity Quotient       0
Endorsed By                        0
Instagram Popularity Quotient      0
Fire Audit                         0
Liquor License Obtained            0
Situated in a Multi Complex        0
Dedicated Parking                  0
Open Sitting Available             0
Resturant Tier                     0
Restaurant Type                    0
Restaurant Theme                   0
Restaurant Zomato Rating           0
Restaurant City Tier               0
Order Wait Time                    0
Staff Responsivness                0
Value for Money                    0
Hygiene Rating                     0
Food Rating                        0
Overall Restaurant Rating         34
Live Music Rating                102
Comedy Gigs Rating               370
Value Deals Rating               385
L

In [18]:
# Create an instance of SimpleImputer with strategy='mean'.
SI = SimpleImputer(strategy='mean')

# Fit the imputer on the 'Instagram Popularity Quotient' column of the test set.
SI.fit(hackathon_test_df[['Instagram Popularity Quotient']])

# Transform the column to replace missing values with the mean.
IPQ_test = SI.transform(hackathon_test_df[['Instagram Popularity Quotient']])

# Using the model built on the Training set to predict on the Test Set.
prediction = model.predict(IPQ_test)

# Print the predictions.
print("Predictions on the test set:")
print(prediction)

Predictions on the test set:
[28970960.71768262 37737095.60304943 34490378.9788395  30724187.69475598
 34639727.94355315 25759957.976339   23906082.78391513 27347602.40557766
 31243662.35462957 27448250.62092816 30399516.03233499 33711166.98902912
 35139722.30368149 31081326.52341907 29078102.36628155 27724221.53398601
 33776101.32151331 27867077.06545124 34165707.31641851 28191748.72787223
 28646289.05526163 33516363.99157652 27996945.73041964 27996945.73041964
 31935212.99558628 26308653.08583048 36373474.62088126 35139722.30368149
 32996889.33170293 35789065.62852347 27120332.24188296 31243662.35462957
 36146204.45718656 36535810.45209175 31243662.35462957 22802199.13168376
 32542349.00431354 29490435.37755621 28743690.55398792 24360623.11130452
 36730613.44954436 27802142.73296705 29321606.11309729 27802142.73296705
 35983868.62597607 31568334.01705056 27672274.06799865 33711166.98902912
 29295632.38010361 28711223.38774583 31893005.67947156 33386495.32660812
 26828127.74570407 324

# Saving the output in a data frame and then exporting it to a '.csv' file with the appropriate 'Registration Number'.

In [19]:
solution_df = pd.DataFrame(hackathon_test_df['Registration Number'])
solution_df


Unnamed: 0,Registration Number
0,20001
1,20002
2,20003
3,20004
4,20005
...,...
495,20496
496,20497
497,20498
498,20499


In [20]:
# Adding the predictions to the solution dataframe.
solution_df['Annual Turnover'] = prediction  # type: ignore
solution_df

Unnamed: 0,Registration Number,Annual Turnover
0,20001,2.897096e+07
1,20002,3.773710e+07
2,20003,3.449038e+07
3,20004,3.072419e+07
4,20005,3.463973e+07
...,...,...
495,20496,2.994498e+07
496,20497,2.395478e+07
497,20498,2.975017e+07
498,20499,3.770463e+07


In [21]:
# Setting the directory to export the file as a '.csv' file.
import os
location = '/Users/robbarker/Desktop/Restaurant-Turnover-Prediction'
os.chdir(location)



In [22]:
# Exporting the data frame to a '.csv' file and setting the index = False as we do want the index.

solution_df.to_csv('TeamPi_Hackathon_Project.csv',index=False)

# END