In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

This business analysis will talk about the data of the potential restaurants of United States and we'll talk about their potential.

Let's start with installing our features

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

In [None]:
# Load the CSV File
future_resto = pd.read_csv("../input/restaurant-business-rankings-2020/Future50.csv")

From the load of csv above, we get this table.

In [None]:
# Show the table
future_resto.head()

- Restaurant: Name of the restaurant
- Location: Origin of the restaurant
- Sales: Systemwide Sales (Millions USD)
- YOY_Sales = Year-on-year sales increase in %
- Units = Number of Premises
- YOY_Units = year-on-year premises increase in %
- Unit_Volume = Average Unit Volume (Thousands USD)

In [None]:
# Check the table data type
future_resto.info()

From the info above we can see that all column has 50 rows and no null values between them, so we don't need to do a data cleaning

But, from the table, we can see that there are some integer value but with '%' sign and also a location value with ','. We need to fromat the integer into a real integer, separate the location column into the city and state column, and add more info at each column title about each value.

# Data Preparation

## Location

Start by separating the location column into 'City' and 'State'. This will help to group the restaurant based on 'States' and 'Cities', so we can know the condition of restaurants at each cities or states

In [None]:
# Split the 'Name' column into two columns: 'First Name' and 'Last Name'
future_resto[['City', 'State']] = future_resto['Location'].str.split(',', expand=True)

# Remove leading/trailing spaces if needed
future_resto['City'] = future_resto['City'].str.strip()
future_resto['State'] = future_resto['State'].str.strip()

# Drop the original column of 'Location'
future_resto = future_resto.drop(columns=['Location'])



## YOY Value

Continued by created an integer value at each year-on-year value. So the year-on-year value will be easier to be processed

In [None]:
# Create new column that contains only the integer data of year-on-year sales & units
future_resto['YOY_Sales (%)'] = future_resto['YOY_Sales'].apply(lambda x: int(float(x.strip('%'))))
future_resto['YOY_Units (%)'] = future_resto['YOY_Units'].apply(lambda x: int(float(x.strip('%'))))

# Drop the original column of YOY_sales and YOY_units
future_resto = future_resto.drop(columns=['YOY_Sales', 'YOY_Units'])

## Column info

Add lastly, add some more info at column title for integer value

In [None]:
# Create a new 'Unit Volume' column and change into 'Million USD'
units_volume = (future_resto['Unit_Volume'] / 1000).round(2)
future_resto['Unit Volume (Million USD)'] = units_volume

# Drop the original 'Unit_Volume' columns
future_resto = future_resto.drop(columns=['Unit_Volume'])

# Rename the 'Sales' column into 'Sales (Million USD)'
future_resto = future_resto.rename(columns={'Sales': 'Sales (Million USD)'})

In [None]:
# Check the result
future_resto.info()

# Column Analysis

## 1. Franchise

In [None]:
franchise = future_resto['Franchising'].value_counts().index
count = future_resto['Franchising'].value_counts()

In [None]:
# Create the figure with size (6, 4)
plt.figure(figsize = (6, 4))

# Create the piechart
plt.pie(count, labels = franchise, explode = (0, 0.1), 
        autopct = '%.1f%%', shadow = True, startangle = 90)

# Create the title and axis
plt.title('Franchised Restaurant Percentage')
plt.axis('equal')

# Show the piechart
plt.show()

From the chart above we can see that 58% or 29 samples of the restaurant make it as a franchise, while 42% or 21 doesn't.

## 2. Unit

In [None]:
# Barchart to count units of every restaurant
sns.displot(future_resto['Units'], color='green').set(title='Number of Units of Every Samples')

Barchart above is a barchart that contain the number of units does every restaurant have. You can see that more than 50% samples has less than 50 units, while only 11 samples has more than 50 unit restaurants

In [None]:
# Filter for restaurants where 'Franchising' is 'Yes' and 'No'
franchise_restaurants = future_resto[future_resto['Franchising'] == 'Yes']
no_franchise_restaurants = future_resto[future_resto['Franchising'] == 'No']

# Get the top 10 of non-franchise restaurants with the largest unit number
potential_units_yes = franchise_restaurants.nlargest(n=10, columns='Units').sort_values('Units', ascending=False)
potential_units_yes = potential_units_yes[['Restaurant', 'Units', 'Franchising']]

# Get the top 10 of non-franchise restaurants with the largest unit number
potential_units_no = no_franchise_restaurants.nlargest(n=10, columns='Units').sort_values('Units', ascending=False)
potential_units_no = potential_units_no[['Restaurant', 'Units', 'Franchising']]

# Display the results
print("Top 10 Franchise Restaurant with largest unit number:")
print(potential_units_yes)
print("")
print(" ")
print("--------------------------")
print("")
print("Top 10 Non-Franchise Restaurant with largest unit number:")
print(potential_units_no)

From table above we can see that even restaurant with the highest number of unit, if they don't make it a franchise, they won't make it to the top 10 of restaurant with the most units

In [None]:
# Filter for 'Franchising' == 'Yes' and get the top 10 by 'Units'
potential_units_yes = potential_units_yes.nsmallest(n=10, columns='Units').sort_values('Units', ascending=False)
potential_units_yes = potential_units_yes[['Restaurant', 'Units', 'Franchising']]

# Display the results
print("Top 10 Franchise Restaurant with Smallest Unit Number")
print(potential_units_yes)

Meanwhile, if you make your restaurant a franchise, even restaurant with the smallest number still able to reach the top 10 of the largest restaurant units with no franchise

## 3. Location

In [None]:
future_resto['Restaurant'].value_counts()

Since all of the restaurant only has 1 samples, that mean the location data is only the location of the base restaurant or the first unit of each restaurants. We don't have data of the location of each units of restaurants. So we can't assume the units location will help (or not helping) for the restaurant sale values. 

In [None]:
# Declare the value for the chart
tate = future_resto['State'].value_counts().index
count = future_resto['State'].value_counts()

In [None]:
# Create a bar chart
plt.figure(figsize=(10, 6))
plt.bar(state, count, color='skyblue')

# Add labels and title
plt.xlabel('State')
plt.ylabel('Number of Restaurants')
plt.title('Number of Restaurants by State')

# Rotate the x labels for better readability
plt.xticks(rotation=45, ha='right')

# Show the plot
plt.tight_layout()
plt.show()

From the barchart above can see that only 2 states that has more than 5 potential restaurants. Those states are California and NY. While a lot of states (more than 50% of the states) only has 1 potential restaurant. Let's explore the franchise type.

In [None]:
# Filter for restaurants in California and New York
california_ny_restaurants = future_resto[future_resto['State'].isin(['Calif.', 'N.Y.'])]

# Show the franchise status of these restaurants and sort by 'Franchising'
franchise_status = california_ny_restaurants[['Restaurant', 'State', 'Franchising']].sort_values(by='Franchising',ascending=False)

# Display the result
print("Franchise Status of Restaurants in California & New York")
print(franchise_status)


From the table above we can see that 9 samples are a franchise and 9 other samples aren't a franchise. But it's interesting that 6 of franchise samples are located in California while 6 of non-franchise samples are located in New York. Let's see the states with only 1 potential restaurant.

In [None]:
# Filter for restaurants in California and New York
one_restaurants = future_resto[future_resto['State'].isin(['D.C.', 'Ark.', 'Ore.', 'Neb.', 'Ariz.', 'Wash.','Va.', 'Mo.', 'Colo.', 'S.C.', 'Ky.', 'Tenn.'])]

# Show the franchise status of these restaurants and sort by 'Franchising'
franchise_status = one_restaurants[['Restaurant', 'State', 'Franchising']].sort_values(by='Franchising',ascending=False)

# Display the result
print("Franchise Status of the Restaurant at the State Where There is Only 1 Potential Restaurant")
print(franchise_status)


The result was also 50% of samples are franchise restaurant and 50% of samples are non-franchise restaurant. So far, we can conclude that the restaurant location has no relation to the franchise status. But to make sure, we need to see the location of each state.

## 4. Sales

In [None]:
# Get the top 10 restaurants by sales and sort them in descending order
potential_sales_yes = franchise_restaurants.nlargest(n=10, columns='Sales (Million USD)').sort_values('Sales (Million USD)', ascending=False)

# Display the relevant columns
result = potential_sales_yes[['Restaurant', 'Sales (Million USD)', 'Franchising']]

# Display the result
print("Top 10 Franchise Restaurant with Highest Sales Value:")
print(result)

In [None]:
# Get the top 10 restaurants by sales and sort them in descending order
potential_sales_no = no_franchise_restaurants.nlargest(n=10, columns='Sales (Million USD)').sort_values('Sales (Million USD)', ascending=False)

# Display the relevant columns
result = potential_sales_no[['Restaurant', 'Sales (Million USD)', 'Franchising']]

# Display the result
print("Top 10 Non-Franchise Restaurant with Highest Sales Value:")
print(result)

From both tables, we can see that franchise creates not much effect on sales number. Even though, we can see that only three non-franchise restaurant made it to the top 10, while there are 5 non-franchise restaurants at the bottom 10. So, we can conclude that the sales number of non-franchise restaurant is slightly lower than the franchise restaurant.

## 5. Unit Volume

In [None]:
# Get the top 10 restaurants by sales and sort them in descending order
potential_volume_yes = franchise_restaurants.nlargest(n=10, columns='Unit Volume (Million USD)').sort_values('Unit Volume (Million USD)', ascending=False)

# Display the relevant columns
result = potential_volume_yes[['Restaurant', 'Unit Volume (Million USD)', 'Franchising']]

# Display the result
print("Top 10 Franchise Restaurant with Highest Unit Volume:")
print(result)

In [None]:
# Get the top 10 restaurants by sales and sort them in descending order
potential_volume_no = no_franchise_restaurants.nlargest(n=10, columns='Unit Volume (Million USD)').sort_values('Unit Volume (Million USD)', ascending=False)

# Display the relevant columns
result = potential_volume_no[['Restaurant', 'Unit Volume (Million USD)', 'Franchising']]

# Display the result
print("Top 10 Non-Franchise Restaurant with Highest Unit Volume:")
print(result)

## 6. Year-on-Year Analysis

In [None]:
# Create the barchart for Year-on-year Sale Growth
sns.displot(future_resto['YOY_Sales (%)'], color='Red').set(title='Number of Restaurant based on Year-on-Year Sale Growth')

# Create the barchart for Year-on-year Unit Growth
sns.displot(future_resto['YOY_Units (%)']).set(title='Number of Restaurant based on Year-on-Year Unit Growth')

From the graph we can see that most of the YOY Sales and YOY Units percentage of every sample are below 50%. Even only 4 restaurants has YOY sales above 100% and only 2 has YOY units above 75% 

In [None]:
# Get the top 10 franchise restaurants by year-on-year sales and sort them in descending order
potential_growth_sales_yes = franchise_restaurants.nlargest(n = 10, columns = 'YOY_Sales (%)').sort_values('YOY_Sales (%)', ascending = False)

# Display the relevant columns
result = potential_growth_sales_yes[['Restaurant', 'Sales (Million USD)', 'YOY_Sales (%)', 
                                     'Franchising']]

# Display the result
print("Top 10 Franchise Restaurant with Highest Year-on-Year Sale Growth")
print(" ")
print(result)

In [None]:
# Get the top 10 non-franchise restaurants by year-on-year sales and sort them in descending order
potential_growth_sales_no = no_franchise_restaurants.nlargest(n = 10, columns = 'YOY_Sales (%)').sort_values('YOY_Sales (%)', ascending = False)

# Display the relevant columns
result = potential_growth_sales_no[['Restaurant', 'Sales (Million USD)', 'YOY_Sales (%)', 'Franchising']]

# Display the result
print("Top 10 Non-Franchise Restaurant with Highest Year-on-Year Sale Growth")
print(" ")
print(result)

In [None]:
# Get the top 10 franchise restaurants by year-on-year units and sort them in descending order
potential_growth_units_yes = franchise_restaurants.nlargest(n = 10, columns = 'YOY_Units (%)').sort_values('YOY_Units (%)', ascending = False)

# Display the relevant columns
result = potential_growth_units_yes[['Restaurant', 'Units', 'YOY_Units (%)', 'Franchising']]

# Display the result
print("Top 10 Franchise Restaurant with Highest Year-on-Year Unit Growth")
print(" ")
print(result)

In [None]:
# Get the top 10 non-franchise restaurants by year-on-year sales 
# Sort them in descending order
potential_growth_units_no = no_franchise_restaurants.nlargest(n = 10, columns = 'YOY_Units (%)').sort_values('YOY_Units (%)', ascending = False)

# Display the relevant columns
result = potential_growth_units_no[['Restaurant', 'Units', 'YOY_Units (%)', 'Franchising']]

# Display the result
print("Top 10 Franchise Restaurant with Highest Year-on-Year Unit Growth")
print(" ")
print(result)

# Restaurant Prediction

## Predicting Restaurant With the Most Units Next Year

In [None]:
# Figure out the average price by dividing total sales and total meals served
units_next_year = (((future_resto['YOY_Units (%)'] / 100) * future_resto['Units']) + future_resto['Units']).round()
future_resto['Predicted Units Next Year'] = units_next_year
future_resto.sort_values(by='Predicted Units Next Year', ascending=False).head(10)

From the y-o-y units growth comparison, we can see that at the top 10, franchise restaurants gained more percentage. That's because based on the system, franchise system is easier to gain new units.

## Predicting Restaurant with the Most Sales Next Year

In [None]:
# Figure out the average price by dividing total sales and total meals served
sales_next_year = (((future_resto['YOY_Sales (%)'] / 100) * future_resto['Sales (Million USD)']) + future_resto['Sales (Million USD)']).round(2)
future_resto['Predicted Sales Next Year (Million USD)'] = sales_next_year
future_resto.sort_values(by='Predicted Sales Next Year (Million USD)', ascending=False).head(10)

That easier way resulted the above table. We can see that from the 10 restaurant on the list, 8 of them are franchise restaurants. That mean franchise restaurant has more potential value of being big restaurant, if they can maintain their performance next year.

## Predicting Restaurant With the Highest Unit Volume Next Year

In [None]:
# Figure out the average price by dividing total sales and total meals served
volume_rate_next_year = (future_resto['Predicted Sales Next Year (Million USD)'] / future_resto['Predicted Units Next Year']).round(2)
future_resto['Predicted Unit Volume Next Year'] = volume_rate_next_year
future_resto.sort_values(by='Predicted Unit Volume Next Year', ascending=False).head(10)

But the good news is, non-franchise restaurant still has bigger unit volume. So, even though they still have lower sale value, they can maintain their unit quality, but then change their type into a franchise restaurant, to gain sales