In [1]:
import pandas as pd
import numpy as np
import matplotlib 
import matplotlib.pyplot as plt

In [2]:
file = pd.read_csv('https://raw.githubusercontent.com/angelaaaateng/ftw_python/main/data/ramen-ratings.csv')

In [3]:
file.head()

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
0,2580,New Touch,T's Restaurant Tantanmen,Cup,Japan,3.75,
1,2579,Just Way,Noodles Spicy Hot Sesame Spicy Hot Sesame Guan...,Pack,Taiwan,1.0,
2,2578,Nissin,Cup Noodles Chicken Vegetable,Cup,USA,2.25,
3,2577,Wei Lih,GGE Ramen Snack Tomato Flavor,Pack,Taiwan,2.75,
4,2576,Ching's Secret,Singapore Curry,Pack,India,3.75,


In [4]:
file[file["Brand"]=="Wei Lih"]

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
3,2577,Wei Lih,GGE Ramen Snack Tomato Flavor,Pack,Taiwan,2.75,
40,2540,Wei Lih,GGE Noodle Snack Wheat Crackers Mexican Spicy,Pack,Taiwan,3.25,
614,1966,Wei Lih,GGE Noodle Snack Wheat Crackers Soy Sauce Ramen,Pack,Taiwan,3.75,
636,1944,Wei Lih,GGE Noodle Snack Wheat Crackers Hot Spicy,Pack,Taiwan,5.0,
853,1727,Wei Lih,Jah Jan Men,Bowl,Taiwan,5.0,
932,1648,Wei Lih,Instant Noodles With Onion Flavour,Pack,Taiwan,4.25,
1118,1462,Wei Lih,Steam Instant Noodles Korean Salt & Rib Soup F...,Bowl,Taiwan,2.25,
1400,1180,Wei Lih,What’s That? Leisure Meatballs Spicy Chicken F...,Pack,Taiwan,3.75,
1449,1131,Wei Lih,What’s That? Leisure Meatballs Chicken Flavor,Pack,Taiwan,3.75,
1545,1035,Wei Lih,Spicy Sichuan Flavor Instant Noodle,Bowl,Taiwan,3.5,


Investigate the amount of data we have

In [5]:
file.shape

(2580, 7)

Get to know what are the data types of each column(feature)

In [6]:
file.dtypes

Review #     int64
Brand       object
Variety     object
Style       object
Country     object
Stars       object
Top Ten     object
dtype: object

In [7]:
type(file["Brand"].loc[0])

str

Note that "Stars" data is an object now, which is actually **string** \
We need to convert it into **float** to know the stats of it

In [8]:
file.Stars.describe()

count     2580
unique      51
top          4
freq       384
Name: Stars, dtype: object

But can we do it directly?

In [9]:
a= "4.0"
b = float(a)
b

4.0

In [10]:
# file.Stars.astype(float)

No, as we have three brands not being rated

In [11]:
file[file["Stars"]=='Unrated']

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
32,2548,Ottogi,Plain Instant Noodle No Soup Included,Pack,South Korea,Unrated,
122,2458,Samyang Foods,Sari Ramen,Pack,South Korea,Unrated,
993,1587,Mi E-Zee,Plain Noodles,Pack,Malaysia,Unrated,


Remember to use .copy() here as we are setting a new dataframe (what if we don't?)

In [12]:
file_rated = file[file["Stars"]!='Unrated'].copy()
#file_rated = file[file["Stars"]!='Unrated']
file_rated.shape

(2577, 7)

In [13]:
file_rated["Stars"] = file_rated["Stars"].astype(float)
file_rated.dtypes

Review #      int64
Brand        object
Variety      object
Style        object
Country      object
Stars       float64
Top Ten      object
dtype: object

In [14]:
file_rated["Stars"].describe()

count    2577.000000
mean        3.654676
std         1.015331
min         0.000000
25%         3.250000
50%         3.750000
75%         4.250000
max         5.000000
Name: Stars, dtype: float64

Then we can get a sense of the data in each column

In [15]:
file_rated["Brand"].describe(), file_rated["Variety"].describe(), file_rated["Style"].describe(), file_rated["Country"].describe(),

(count       2577
 unique       355
 top       Nissin
 freq         381
 Name: Brand, dtype: object, count     2577
 unique    2410
 top       Beef
 freq         7
 Name: Variety, dtype: object, count     2575
 unique       7
 top       Pack
 freq      1528
 Name: Style, dtype: object, count      2577
 unique       38
 top       Japan
 freq        352
 Name: Country, dtype: object)

One interesting question to ask may be what are the best rated brands

In [16]:
file_rated[["Brand","Stars"]].groupby("Brand").mean()

Unnamed: 0_level_0,Stars
Brand,Unnamed: 1_level_1
1 To 3 Noodles,4.000000
7 Select,3.750000
7 Select/Nissin,3.500000
A-One,2.750000
A-Sha Dry Noodle,4.067308
...,...
Yum Yum,3.750000
Yum-Mie,3.500000
Zow Zow,3.750000
iMee,3.500000


In [17]:
file_brand = file_rated[["Brand","Stars"]].groupby("Brand").mean()
file_brand.sort_values(by="Stars",ascending=False)

Unnamed: 0_level_0,Stars
Brand,Unnamed: 1_level_1
Kimura,5.0
ORee Garden,5.0
The Ramen Rater Select,5.0
Komforte Chockolates,5.0
ChoripDong,5.0
...,...
Dr. McDougall's,0.0
Tiger,0.0
Kim Ve Wong,0.0
Roland,0.0


In [18]:
file_brand[file_brand["Stars"] == 5.0 ]

Unnamed: 0_level_0,Stars
Brand,Unnamed: 1_level_1
ChoripDong,5.0
Daddy,5.0
Daifuku,5.0
Foodmon,5.0
Higashi,5.0
Jackpot Teriyaki,5.0
Kiki Noodle,5.0
Kimura,5.0
Komforte Chockolates,5.0
MyOri,5.0


In [19]:
mask = (file_rated["Stars"] > 4.6) & (file_rated["Stars"] < 4.8)

file_rated[mask]

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
5,2575,Samyang Foods,Kimchi song Song Ramen,Pack,South Korea,4.75,
23,2557,Yamachan,Sapporo Miso Ramen,Pack,USA,4.75,
27,2553,Nissin,Hakata Ramen Noodle White Tonkotsu,Bowl,Japan,4.75,
48,2532,Nissin,Nippon Onomichi Ramen,Bowl,Japan,4.75,
98,2482,TTL,Chicken With Chinese Shaoxing Wine,Pack,Taiwan,4.75,
...,...,...,...,...,...,...,...
2259,321,Little Cook,TVP Spicy Beef,Bowl,Thailand,4.75,
2332,248,Koka,Tomato,Pack,Singapore,4.75,
2336,244,Nissin,Karashi Mayo Yakisoba,Tray,Japan,4.75,
2400,180,Myojo,Hyobanya No Yakisoba Sauce Flavor,Tray,Japan,4.75,


What about Ramens in different countries? (The following materials are not required, at least for now)

In [20]:
file_country = file_rated[["Country","Stars","Brand"]].groupby("Country", as_index=False).agg({'Stars':'mean', 'Brand':'count'})
#file_country.sort_values(by="Stars",ascending = False, inplace=True)
file_country.sort_values(by="Brand",ascending = False, inplace=True)
file_country.head()

Unnamed: 0,Country,Stars,Brand
18,Japan,3.981605,352
35,USA,3.457043,323
30,South Korea,3.790554,307
32,Taiwan,3.665402,224
33,Thailand,3.384817,191


In [21]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=file_country["Country"], y=file_country["Stars"], name="Rating"),
    secondary_y=False,
)

fig.add_trace(
    go.Bar(x=file_country["Country"], y=file_country["Brand"], name="Brands"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Ramen ratings data visualization demo 1"
)

# Set x-axis title
fig.update_xaxes(title_text="Country")

# Set y-axes titles
fig.update_yaxes(title_text="Average Stars", secondary_y=False)
fig.update_yaxes(title_text="Number of Brands", secondary_y=True)

fig.show()

In [22]:
import plotly.express as px
fig2 = px.scatter(x=file_country["Brand"], y=file_country["Stars"], text=file_country["Country"], labels={"x":"number of brands","y":"average rating"}, title="Ramen ratings data visualization demo 2")
fig2.show()

In [23]:
file_rated['Brand'].value_counts()

Nissin           381
Nongshim          98
Maruchan          76
Mama              71
Paldo             66
                ... 
Golden Wonder      1
Peyang             1
Sanrio             1
China Best         1
Westbrae           1
Name: Brand, Length: 355, dtype: int64

In [24]:
file_rated.groupby('Brand')['Stars'].agg([('average','mean'),('total','sum')]) 

Unnamed: 0_level_0,average,total
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
1 To 3 Noodles,4.000000,4.00
7 Select,3.750000,7.50
7 Select/Nissin,3.500000,3.50
A-One,2.750000,11.00
A-Sha Dry Noodle,4.067308,105.75
...,...,...
Yum Yum,3.750000,45.00
Yum-Mie,3.500000,3.50
Zow Zow,3.750000,3.75
iMee,3.500000,14.00


In [77]:
top = file_rated.groupby('Variety')['Stars'].agg([('average','mean'), ('total','sum')])

In [81]:
top.sort_values(['total'],ascending=False).head(5)

Unnamed: 0_level_0,average,total
Variety,Unnamed: 1_level_1,Unnamed: 2_level_1
Yakisoba,4.583333,27.5
Beef,3.535714,24.75
Chicken,3.107143,21.75
Artificial Chicken,3.416667,20.5
Vegetable,3.125,18.75


In [37]:
file_rated[file_rated['Variety']=='Cup-A-Soup Chicken Noodle With White Meat Instant Soup'] 

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
36,2544,Lipton,Cup-A-Soup Chicken Noodle With White Meat Inst...,Box,USA,1.5,


In [27]:
file_rated['Variety'].value_counts()

Beef                                              7
Chicken                                           7
Vegetable                                         6
Artificial Chicken                                6
Yakisoba                                          6
                                                 ..
Veggie Noodle Black Sesame Noodle                 1
Nuudeli Liha Nudlar Kott                          1
Artificial Beef Instant Noodles With Soup Base    1
Otentiq Cheezy Curry Instant Noodles              1
Tom Yum Chili Flavor                              1
Name: Variety, Length: 2410, dtype: int64

In [32]:
file_rated[file_rated['Variety']=='Chef Curry Laksa Flavour'] 

Unnamed: 0,Review #,Brand,Variety,Style,Country,Stars,Top Ten
1221,1359,Mamee,Chef Curry Laksa Flavour,Cup,Malaysia,5.0,
1230,1350,Mamee,Chef Curry Laksa Flavour,Pack,Malaysia,5.0,2014 #7


In [66]:
file_rated.groupby('Variety')['Stars'].agg([('avg','mean'), ('sum','sum'),('cnt','count')]).reset_index().assign(avgXcnt=lambda x: x.avg * x.cnt).sort_values(['sum','avgXcnt'],ascending=False)

Unnamed: 0,Variety,avg,sum,cnt,avgXcnt
2373,Yakisoba,4.583333,27.50,6,27.50
141,Beef,3.535714,24.75,7,24.75
303,Chicken,3.107143,21.75,7,21.75
71,Artificial Chicken,3.416667,20.50,6,20.50
2321,Vegetable,3.125000,18.75,6,18.75
...,...,...,...,...,...
2090,Spicy Tomato Salsa Ramen,0.000000,0.00,1,0.00
2183,Sweet Potato Instant Noodle Sout-Hot Flavor,0.000000,0.00,1,0.00
2229,Tiny Noodle With Oyster Flavor,0.000000,0.00,1,0.00
2319,Vegan Pad Thai Noodle Soup,0.000000,0.00,1,0.00


In [76]:
file_rated.groupby('Variety')['Stars'].agg([('avg','mean'), ('sum','sum'),('cnt','count')]).reset_index().assign(avgXcnt=lambda x: x.avg * x.cnt).query("Variety=='Chef Curry Laksa Flavour'")

Unnamed: 0,Variety,avg,sum,cnt,avgXcnt
295,Chef Curry Laksa Flavour,5.0,10.0,2,10.0
