NUMPY

In [21]:
#Imagine we are managing a restaurant and analyzing data such as the 
#number of meals served per day, revenue, and other statistics. 
#We'll use NumPy to calculate this data.

import numpy as np

# Number of meals served per day

meals_served = np.array([120,150,100,180,200,170,130])
meals_served

array([120, 150, 100, 180, 200, 170, 130])

In [13]:
# Revenue per day (in USD)
revenue = np.array([2400, 3000, 2000, 3600, 4000, 3400, 2600])
revenue

array([2400, 3000, 2000, 3600, 4000, 3400, 2600])

In [27]:
revenue_per_meal = revenue / meals_served
revenue_per_meal

array([20., 20., 20., 20., 20., 20., 20.])

In [33]:
#Add 50 more meals to each day (e.g., a new marketing campaign)
meals_add = meals_served + 50
meals_add

array([170, 200, 150, 230, 250, 220, 180])

In [39]:
# Subtract 30 meals (low season effect)
meals_sub = meals_served - 30
meals_sub

array([ 90, 120,  70, 150, 170, 140, 100])

In [43]:
# Multiply meals by 2 (e.g., for weekend projections)
meals_mul = meals_served * 2
meals_mul

array([240, 300, 200, 360, 400, 340, 260])

In [47]:
# Divide revenue by 2 (e.g., to get half-week data)
revenue_half = revenue / 2
revenue_half

array([1200., 1500., 1000., 1800., 2000., 1700., 1300.])

In [51]:
# Access the number of meals served on day 3 (index 2)
meals_day3 = meals_served[2]
meals_day3


100

In [53]:
# Access the number of meals served from day 2 to day 4 (index 1 to 3)
meals_subset = meals_served[1:4]
meals_subset

array([150, 100, 180])

In [55]:
# Access the revenue on day 5 (index 4)
revenue_day5 = revenue[4]
revenue_day5


4000

In [57]:
# Reshape meals served into a 7x1 matrix (useful for reports)
reshaped_meals = meals_served.reshape(7, 1)
reshaped_meals

array([[120],
       [150],
       [100],
       [180],
       [200],
       [170],
       [130]])

In [61]:
# Reshape revenue into a 7x1 matrix
reshaped_revenue = revenue.reshape(7, 1)
reshaped_revenue

array([[2400],
       [3000],
       [2000],
       [3600],
       [4000],
       [3400],
       [2600]])

In [63]:
# Create a matrix with meals and revenue
data_matrix = np.array([meals_served, revenue])
data_matrix

array([[ 120,  150,  100,  180,  200,  170,  130],
       [2400, 3000, 2000, 3600, 4000, 3400, 2600]])

In [67]:
# Transpose the matrix to switch rows and columns
transposed_data = data_matrix.T
transposed_data


array([[ 120, 2400],
       [ 150, 3000],
       [ 100, 2000],
       [ 180, 3600],
       [ 200, 4000],
       [ 170, 3400],
       [ 130, 2600]])

In [69]:
avg_meals = np.mean(meals_served)
avg_meals

150.0

In [71]:
median_revenue = np.median(revenue)
median_revenue

3000.0

In [77]:
# Calculate the standard deviation of meals served
std_meals = np.std(meals_served)
std_meals

32.95017884191656

In [81]:
# Calculate the variance in revenue
variance_revenue = np.var(revenue)
variance_revenue


434285.71428571426

In [85]:
# Staff presence for each day (1 means fully staffed, 0 means partially staffed)
staff_presence = np.array([True, True, False, True, True, False, True], dtype=bool)
staff_presence

array([ True,  True, False,  True,  True, False,  True])

In [87]:
# Names of different restaurants in a chain
restaurant_names = np.array(["Downtown Diner", "Seaside Eatery", "Mountain Grill"], dtype=str)
restaurant_names

array(['Downtown Diner', 'Seaside Eatery', 'Mountain Grill'], dtype='<U14')

In [89]:
# Data from another week
meals_next_week = np.array([140, 160, 110, 190, 210, 180, 140])
# Concatenate the two weeks' meal data
meals_two_weeks = np.concatenate ([meals_served, meals_next_week])
meals_two_weeks

array([120, 150, 100, 180, 200, 170, 130, 140, 160, 110, 190, 210, 180,
       140])

In [91]:
# Sort meals served in ascending order
sorted_meals_asc = np.sort(meals_served)
sorted_meals_asc

array([100, 120, 130, 150, 170, 180, 200])

In [93]:
# Sort revenue in descending order
sorted_revenue_desc = np.sort(revenue)[::-1]
sorted_revenue_desc

array([4000, 3600, 3400, 3000, 2600, 2400, 2000])

PANDAS

In [104]:
import pandas as pd
df = pd.read_csv('food_coded.csv')
df
#Food choices and preferences of college students

Unnamed: 0,GPA,Gender,breakfast,calories_chicken,calories_day,calories_scone,coffee,comfort_food,comfort_food_reasons,comfort_food_reasons_coded,...,soup,sports,thai_food,tortilla_calories,turkey_calories,type_sports,veggies_day,vitamins,waffle_calories,weight
0,2.4,2,1,430,,315.0,1,none,we dont have comfort,9.0,...,1.0,1.0,1,1165.0,345,car racing,5,1,1315,187
1,3.654,1,1,610,3.0,420.0,2,"chocolate, chips, ice cream","Stress, bored, anger",1.0,...,1.0,1.0,2,725.0,690,Basketball,4,2,900,155
2,3.3,1,1,720,4.0,420.0,2,"frozen yogurt, pizza, fast food","stress, sadness",1.0,...,1.0,2.0,5,1165.0,500,none,5,1,900,I'm not answering this.
3,3.2,1,1,430,3.0,420.0,2,"Pizza, Mac and cheese, ice cream",Boredom,2.0,...,1.0,2.0,5,725.0,690,,3,1,1315,"Not sure, 240"
4,3.5,1,1,720,2.0,420.0,2,"Ice cream, chocolate, chips","Stress, boredom, cravings",1.0,...,1.0,1.0,4,940.0,500,Softball,4,2,760,190
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,3.5,1,1,610,4.0,420.0,2,"wine. mac and cheese, pizza, ice cream",boredom and sadness,,...,1.0,1.0,5,940.0,500,Softball,5,1,1315,156
121,3,1,1,265,2.0,315.0,2,Pizza / Wings / Cheesecake,Loneliness / Homesick / Sadness,,...,1.0,,4,940.0,500,basketball,5,2,1315,180
122,3.882,1,1,720,,420.0,1,"rice, potato, seaweed soup",sadness,,...,1.0,2.0,5,580.0,690,none,4,2,1315,120
123,3,2,1,720,4.0,420.0,1,"Mac n Cheese, Lasagna, Pizza","happiness, they are some of my favorite foods",,...,2.0,2.0,1,940.0,500,,3,1,1315,135


In [106]:
print(df.shape)

(125, 61)


In [110]:
print(df.dtypes)

GPA                  object
Gender                int64
breakfast             int64
calories_chicken      int64
calories_day        float64
                     ...   
type_sports          object
veggies_day           int64
vitamins              int64
waffle_calories       int64
weight               object
Length: 61, dtype: object


In [112]:
print(df.isnull().sum())

GPA                  2
Gender               0
breakfast            0
calories_chicken     0
calories_day        19
                    ..
type_sports         26
veggies_day          0
vitamins             0
waffle_calories      0
weight               2
Length: 61, dtype: int64


In [114]:
missing_values = df[df.isnull().any(axis=1)]
print(missing_values)

       GPA  Gender  breakfast  calories_chicken  calories_day  calories_scone  \
0      2.4       2          1               430           NaN           315.0   
3      3.2       1          1               430           3.0           420.0   
5     2.25       1          1               610           3.0           980.0   
8      3.3       1          1               430           NaN           420.0   
15     NaN       2          2               430           NaN           980.0   
..     ...     ...        ...               ...           ...             ...   
120    3.5       1          1               610           4.0           420.0   
121      3       1          1               265           2.0           315.0   
122  3.882       1          1               720           NaN           420.0   
123      3       2          1               720           4.0           420.0   
124    3.9       1          1               430           NaN           315.0   

     coffee                

In [116]:
print(df.describe())

           Gender   breakfast  calories_chicken  calories_day  calories_scone  \
count  125.000000  125.000000        125.000000    106.000000      124.000000   
mean     1.392000    1.112000        577.320000      3.028302      505.241935   
std      0.490161    0.316636        131.214156      0.639308      230.840506   
min      1.000000    1.000000        265.000000      2.000000      315.000000   
25%      1.000000    1.000000        430.000000      3.000000      420.000000   
50%      1.000000    1.000000        610.000000      3.000000      420.000000   
75%      2.000000    1.000000        720.000000      3.000000      420.000000   
max      2.000000    2.000000        720.000000      4.000000      980.000000   

          coffee  comfort_food_reasons_coded        cook  \
count  125.00000                  106.000000  122.000000   
mean     1.75200                    2.698113    2.786885   
std      0.43359                    1.972042    1.038351   
min      1.00000              