diff --git a/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises_solved.ipynb b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises_solved.ipynb
new file mode 100644
index 000000000..10fee43b5
--- /dev/null
+++ b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises_solved.ipynb
@@ -0,0 +1,787 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Ex2 - Getting and Knowing your Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This time we are going to pull data directly from the internet.\n",
+ "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
+ "\n",
+ "### Step 1. Import the necessary libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 3. Assign it to a variable called chipo."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# it's tsv file (tab separated file)\n",
+ "# specify sep = \"\\t\" in argument of read_csv\n",
+ "chipo = pd.read_csv(\"https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv\", sep=\"\\t\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 4. See the first 10 entries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " order_id | \n",
+ " quantity | \n",
+ " item_name | \n",
+ " choice_description | \n",
+ " item_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Chips and Fresh Tomato Salsa | \n",
+ " NaN | \n",
+ " $2.39 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Izze | \n",
+ " [Clementine] | \n",
+ " $3.39 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Nantucket Nectar | \n",
+ " [Apple] | \n",
+ " $3.39 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Chips and Tomatillo-Green Chili Salsa | \n",
+ " NaN | \n",
+ " $2.39 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " Chicken Bowl | \n",
+ " [Tomatillo-Red Chili Salsa (Hot), [Black Beans... | \n",
+ " $16.98 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " Chicken Bowl | \n",
+ " [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou... | \n",
+ " $10.98 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " Side of Chips | \n",
+ " NaN | \n",
+ " $1.69 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " Steak Burrito | \n",
+ " [Tomatillo Red Chili Salsa, [Fajita Vegetables... | \n",
+ " $11.75 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " Steak Soft Tacos | \n",
+ " [Tomatillo Green Chili Salsa, [Pinto Beans, Ch... | \n",
+ " $9.25 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " Steak Burrito | \n",
+ " [Fresh Tomato Salsa, [Rice, Black Beans, Pinto... | \n",
+ " $9.25 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " order_id quantity item_name \\\n",
+ "0 1 1 Chips and Fresh Tomato Salsa \n",
+ "1 1 1 Izze \n",
+ "2 1 1 Nantucket Nectar \n",
+ "3 1 1 Chips and Tomatillo-Green Chili Salsa \n",
+ "4 2 2 Chicken Bowl \n",
+ "5 3 1 Chicken Bowl \n",
+ "6 3 1 Side of Chips \n",
+ "7 4 1 Steak Burrito \n",
+ "8 4 1 Steak Soft Tacos \n",
+ "9 5 1 Steak Burrito \n",
+ "\n",
+ " choice_description item_price \n",
+ "0 NaN $2.39 \n",
+ "1 [Clementine] $3.39 \n",
+ "2 [Apple] $3.39 \n",
+ "3 NaN $2.39 \n",
+ "4 [Tomatillo-Red Chili Salsa (Hot), [Black Beans... $16.98 \n",
+ "5 [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou... $10.98 \n",
+ "6 NaN $1.69 \n",
+ "7 [Tomatillo Red Chili Salsa, [Fajita Vegetables... $11.75 \n",
+ "8 [Tomatillo Green Chili Salsa, [Pinto Beans, Ch... $9.25 \n",
+ "9 [Fresh Tomato Salsa, [Rice, Black Beans, Pinto... $9.25 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo.head(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 5. What is the number of observations in the dataset?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4622"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Solution 1\n",
+ "chipo.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4622"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Solution 2\n",
+ "len(chipo)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 6. What is the number of columns in the dataset?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo.shape[1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 7. Print the name of all the columns."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['order_id', 'quantity', 'item_name', 'choice_description',\n",
+ " 'item_price'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 8. How is the dataset indexed?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RangeIndex(start=0, stop=4622, step=1)"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# integer in range(0, 4622)\n",
+ "chipo.index"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 9. Which was the most-ordered item? "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "item_name\n",
+ "Chicken Bowl 761\n",
+ "Chicken Burrito 591\n",
+ "Chips and Guacamole 506\n",
+ "Steak Burrito 386\n",
+ "Canned Soft Drink 351\n",
+ "Chips 230\n",
+ "Steak Bowl 221\n",
+ "Bottled Water 211\n",
+ "Chips and Fresh Tomato Salsa 130\n",
+ "Canned Soda 126\n",
+ "Chicken Salad Bowl 123\n",
+ "Chicken Soft Tacos 120\n",
+ "Side of Chips 110\n",
+ "Veggie Burrito 97\n",
+ "Barbacoa Burrito 91\n",
+ "Veggie Bowl 87\n",
+ "Carnitas Bowl 71\n",
+ "Barbacoa Bowl 66\n",
+ "Carnitas Burrito 60\n",
+ "Steak Soft Tacos 56\n",
+ "6 Pack Soft Drink 55\n",
+ "Chips and Tomatillo Red Chili Salsa 50\n",
+ "Chicken Crispy Tacos 50\n",
+ "Chips and Tomatillo Green Chili Salsa 45\n",
+ "Carnitas Soft Tacos 40\n",
+ "Steak Crispy Tacos 36\n",
+ "Chips and Tomatillo-Green Chili Salsa 33\n",
+ "Steak Salad Bowl 31\n",
+ "Nantucket Nectar 29\n",
+ "Chips and Tomatillo-Red Chili Salsa 25\n",
+ "Barbacoa Soft Tacos 25\n",
+ "Chips and Roasted Chili Corn Salsa 23\n",
+ "Izze 20\n",
+ "Veggie Salad Bowl 18\n",
+ "Chips and Roasted Chili-Corn Salsa 18\n",
+ "Barbacoa Crispy Tacos 12\n",
+ "Barbacoa Salad Bowl 10\n",
+ "Chicken Salad 9\n",
+ "Carnitas Crispy Tacos 8\n",
+ "Veggie Soft Tacos 8\n",
+ "Burrito 6\n",
+ "Veggie Salad 6\n",
+ "Carnitas Salad Bowl 6\n",
+ "Bowl 4\n",
+ "Steak Salad 4\n",
+ "Salad 2\n",
+ "Crispy Tacos 2\n",
+ "Chips and Mild Fresh Tomato Salsa 1\n",
+ "Carnitas Salad 1\n",
+ "Veggie Crispy Tacos 1\n",
+ "Name: quantity, dtype: int64"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "grouped_chipo = chipo.groupby(\"item_name\")[\"quantity\"].sum()\n",
+ "grouped_chipo.sort_values(ascending = False, inplace=True)\n",
+ "grouped_chipo"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Chicken Bowl'"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "grouped_chipo.index[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 10. For the most-ordered item, how many items were ordered?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "761"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "grouped_chipo[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 11. What was the most ordered item in the choice_description column?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "choice_description\n",
+ "[Diet Coke] 134\n",
+ "[Coke] 123\n",
+ "[Sprite] 77\n",
+ "[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream, Lettuce]] 42\n",
+ "[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream, Guacamole, Lettuce]] 40\n",
+ " ... \n",
+ "[Roasted Chili Corn Salsa, [Fajita Vegetables, Rice, Cheese, Guacamole, Lettuce]] 1\n",
+ "[Roasted Chili Corn Salsa, [Fajita Vegetables, Rice, Cheese, Sour Cream, Guacamole]] 1\n",
+ "[Roasted Chili Corn Salsa, [Fajita Vegetables, Rice, Guacamole, Lettuce]] 1\n",
+ "[Roasted Chili Corn Salsa, [Fajita Vegetables, Rice, Guacamole]] 1\n",
+ "[[Tomatillo-Red Chili Salsa (Hot), Tomatillo-Green Chili Salsa (Medium)], [Rice, Pinto Beans, Fajita Veggies, Lettuce]] 1\n",
+ "Name: choice_description, Length: 1043, dtype: int64"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "grouped_by_desc = chipo.groupby(\"choice_description\")[\"choice_description\"].count()\n",
+ "grouped_by_desc.sort_values(ascending=False, inplace=True)\n",
+ "grouped_by_desc"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'[Diet Coke]'"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "grouped_by_desc.index[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 12. How many items were orderd in total?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4972"
+ ]
+ },
+ "execution_count": 79,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sum(chipo[\"quantity\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 13. Turn the item price into a float"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Step 13.a. Check the item price type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dtype('O')"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo[\"item_price\"].dtype"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Step 13.b. Create a lambda function and change the type of item price"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# note that there were currency symbol\n",
+ "chipo[\"item_price\"] = chipo[\"item_price\"].apply(lambda x: float(x[1:]) )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Step 13.c. Check the item price type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dtype('float64')"
+ ]
+ },
+ "execution_count": 62,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo[\"item_price\"].dtype"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 14. How much was the revenue for the period in the dataset?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "39237.02"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo[\"total_price\"] = chipo[\"item_price\"] * chipo[\"quantity\"]\n",
+ "chipo[\"total_price\"].sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 15. How many orders were made in the period?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1834"
+ ]
+ },
+ "execution_count": 85,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# note that one order id can have several item\n",
+ "# group by order id\n",
+ "len(chipo.groupby(\"order_id\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 16. What is the average revenue amount per order?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "21.394231188658654"
+ ]
+ },
+ "execution_count": 89,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Solution 1\n",
+ "group_by_id = chipo.groupby(\"order_id\")[\"total_price\"].sum()\n",
+ "sum(group_by_id) / len(chipo.groupby(\"order_id\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Solution 2\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 17. How many different items are sold?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "50"
+ ]
+ },
+ "execution_count": 75,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo[\"item_name\"].nunique()"
+ ]
+ }
+ ],
+ "metadata": {
+ "anaconda-cloud": {},
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/01_Getting_&_Knowing_Your_Data/Occupation/Exercisess_solved.ipynb b/01_Getting_&_Knowing_Your_Data/Occupation/Exercisess_solved.ipynb
new file mode 100644
index 000000000..db8c76192
--- /dev/null
+++ b/01_Getting_&_Knowing_Your_Data/Occupation/Exercisess_solved.ipynb
@@ -0,0 +1,1057 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Ex3 - Getting and Knowing your Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This time we are going to pull data directly from the internet.\n",
+ "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
+ "\n",
+ "### Step 1. Import the necessary libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 3. Assign it to a variable called users and use the 'user_id' as index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "user_id = pd.read_csv(\"https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user\", sep=\"|\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 4. See the first 25 entries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user_id | \n",
+ " age | \n",
+ " gender | \n",
+ " occupation | \n",
+ " zip_code | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 24 | \n",
+ " M | \n",
+ " technician | \n",
+ " 85711 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 53 | \n",
+ " F | \n",
+ " other | \n",
+ " 94043 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 23 | \n",
+ " M | \n",
+ " writer | \n",
+ " 32067 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 24 | \n",
+ " M | \n",
+ " technician | \n",
+ " 43537 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 33 | \n",
+ " F | \n",
+ " other | \n",
+ " 15213 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 6 | \n",
+ " 42 | \n",
+ " M | \n",
+ " executive | \n",
+ " 98101 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 7 | \n",
+ " 57 | \n",
+ " M | \n",
+ " administrator | \n",
+ " 91344 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 8 | \n",
+ " 36 | \n",
+ " M | \n",
+ " administrator | \n",
+ " 05201 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 9 | \n",
+ " 29 | \n",
+ " M | \n",
+ " student | \n",
+ " 01002 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 10 | \n",
+ " 53 | \n",
+ " M | \n",
+ " lawyer | \n",
+ " 90703 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 11 | \n",
+ " 39 | \n",
+ " F | \n",
+ " other | \n",
+ " 30329 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 12 | \n",
+ " 28 | \n",
+ " F | \n",
+ " other | \n",
+ " 06405 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 13 | \n",
+ " 47 | \n",
+ " M | \n",
+ " educator | \n",
+ " 29206 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 14 | \n",
+ " 45 | \n",
+ " M | \n",
+ " scientist | \n",
+ " 55106 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 15 | \n",
+ " 49 | \n",
+ " F | \n",
+ " educator | \n",
+ " 97301 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 16 | \n",
+ " 21 | \n",
+ " M | \n",
+ " entertainment | \n",
+ " 10309 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 17 | \n",
+ " 30 | \n",
+ " M | \n",
+ " programmer | \n",
+ " 06355 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 18 | \n",
+ " 35 | \n",
+ " F | \n",
+ " other | \n",
+ " 37212 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 19 | \n",
+ " 40 | \n",
+ " M | \n",
+ " librarian | \n",
+ " 02138 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 20 | \n",
+ " 42 | \n",
+ " F | \n",
+ " homemaker | \n",
+ " 95660 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 21 | \n",
+ " 26 | \n",
+ " M | \n",
+ " writer | \n",
+ " 30068 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 22 | \n",
+ " 25 | \n",
+ " M | \n",
+ " writer | \n",
+ " 40206 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 23 | \n",
+ " 30 | \n",
+ " F | \n",
+ " artist | \n",
+ " 48197 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 24 | \n",
+ " 21 | \n",
+ " F | \n",
+ " artist | \n",
+ " 94533 | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 25 | \n",
+ " 39 | \n",
+ " M | \n",
+ " engineer | \n",
+ " 55107 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user_id age gender occupation zip_code\n",
+ "0 1 24 M technician 85711\n",
+ "1 2 53 F other 94043\n",
+ "2 3 23 M writer 32067\n",
+ "3 4 24 M technician 43537\n",
+ "4 5 33 F other 15213\n",
+ "5 6 42 M executive 98101\n",
+ "6 7 57 M administrator 91344\n",
+ "7 8 36 M administrator 05201\n",
+ "8 9 29 M student 01002\n",
+ "9 10 53 M lawyer 90703\n",
+ "10 11 39 F other 30329\n",
+ "11 12 28 F other 06405\n",
+ "12 13 47 M educator 29206\n",
+ "13 14 45 M scientist 55106\n",
+ "14 15 49 F educator 97301\n",
+ "15 16 21 M entertainment 10309\n",
+ "16 17 30 M programmer 06355\n",
+ "17 18 35 F other 37212\n",
+ "18 19 40 M librarian 02138\n",
+ "19 20 42 F homemaker 95660\n",
+ "20 21 26 M writer 30068\n",
+ "21 22 25 M writer 40206\n",
+ "22 23 30 F artist 48197\n",
+ "23 24 21 F artist 94533\n",
+ "24 25 39 M engineer 55107"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id.head(25)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 5. See the last 10 entries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user_id | \n",
+ " age | \n",
+ " gender | \n",
+ " occupation | \n",
+ " zip_code | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 933 | \n",
+ " 934 | \n",
+ " 61 | \n",
+ " M | \n",
+ " engineer | \n",
+ " 22902 | \n",
+ "
\n",
+ " \n",
+ " | 934 | \n",
+ " 935 | \n",
+ " 42 | \n",
+ " M | \n",
+ " doctor | \n",
+ " 66221 | \n",
+ "
\n",
+ " \n",
+ " | 935 | \n",
+ " 936 | \n",
+ " 24 | \n",
+ " M | \n",
+ " other | \n",
+ " 32789 | \n",
+ "
\n",
+ " \n",
+ " | 936 | \n",
+ " 937 | \n",
+ " 48 | \n",
+ " M | \n",
+ " educator | \n",
+ " 98072 | \n",
+ "
\n",
+ " \n",
+ " | 937 | \n",
+ " 938 | \n",
+ " 38 | \n",
+ " F | \n",
+ " technician | \n",
+ " 55038 | \n",
+ "
\n",
+ " \n",
+ " | 938 | \n",
+ " 939 | \n",
+ " 26 | \n",
+ " F | \n",
+ " student | \n",
+ " 33319 | \n",
+ "
\n",
+ " \n",
+ " | 939 | \n",
+ " 940 | \n",
+ " 32 | \n",
+ " M | \n",
+ " administrator | \n",
+ " 02215 | \n",
+ "
\n",
+ " \n",
+ " | 940 | \n",
+ " 941 | \n",
+ " 20 | \n",
+ " M | \n",
+ " student | \n",
+ " 97229 | \n",
+ "
\n",
+ " \n",
+ " | 941 | \n",
+ " 942 | \n",
+ " 48 | \n",
+ " F | \n",
+ " librarian | \n",
+ " 78209 | \n",
+ "
\n",
+ " \n",
+ " | 942 | \n",
+ " 943 | \n",
+ " 22 | \n",
+ " M | \n",
+ " student | \n",
+ " 77841 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user_id age gender occupation zip_code\n",
+ "933 934 61 M engineer 22902\n",
+ "934 935 42 M doctor 66221\n",
+ "935 936 24 M other 32789\n",
+ "936 937 48 M educator 98072\n",
+ "937 938 38 F technician 55038\n",
+ "938 939 26 F student 33319\n",
+ "939 940 32 M administrator 02215\n",
+ "940 941 20 M student 97229\n",
+ "941 942 48 F librarian 78209\n",
+ "942 943 22 M student 77841"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id.tail(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 6. What is the number of observations in the dataset?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "943"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 7. What is the number of columns in the dataset?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id.shape[1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 8. Print the name of all the columns."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['user_id', 'age', 'gender', 'occupation', 'zip_code'], dtype='object')"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 9. How is the dataset indexed?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RangeIndex(start=0, stop=943, step=1)"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# in range(0, 943)\n",
+ "user_id.index"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 10. What is the data type of each column?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 943 entries, 0 to 942\n",
+ "Data columns (total 5 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 user_id 943 non-null int64 \n",
+ " 1 age 943 non-null int64 \n",
+ " 2 gender 943 non-null object\n",
+ " 3 occupation 943 non-null object\n",
+ " 4 zip_code 943 non-null object\n",
+ "dtypes: int64(2), object(3)\n",
+ "memory usage: 37.0+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "user_id.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 11. Print only the occupation column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 technician\n",
+ "1 other\n",
+ "2 writer\n",
+ "3 technician\n",
+ "4 other\n",
+ " ... \n",
+ "938 student\n",
+ "939 administrator\n",
+ "940 student\n",
+ "941 librarian\n",
+ "942 student\n",
+ "Name: occupation, Length: 943, dtype: object"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id[\"occupation\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 12. How many different occupations are in this dataset?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "21"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id[\"occupation\"].nunique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 13. What is the most frequent occupation?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'student'"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "m= user_id.groupby(\"occupation\")[\"occupation\"].count()\n",
+ "m.sort_values(ascending=False, inplace=True)\n",
+ "m.index[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 14. Summarize the DataFrame."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user_id | \n",
+ " age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 943.000000 | \n",
+ " 943.000000 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 472.000000 | \n",
+ " 34.051962 | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 272.364951 | \n",
+ " 12.192740 | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 1.000000 | \n",
+ " 7.000000 | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 236.500000 | \n",
+ " 25.000000 | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 472.000000 | \n",
+ " 31.000000 | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 707.500000 | \n",
+ " 43.000000 | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 943.000000 | \n",
+ " 73.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user_id age\n",
+ "count 943.000000 943.000000\n",
+ "mean 472.000000 34.051962\n",
+ "std 272.364951 12.192740\n",
+ "min 1.000000 7.000000\n",
+ "25% 236.500000 25.000000\n",
+ "50% 472.000000 31.000000\n",
+ "75% 707.500000 43.000000\n",
+ "max 943.000000 73.000000"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 15. Summarize all the columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gender | \n",
+ " occupation | \n",
+ " zip_code | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 943 | \n",
+ " 943 | \n",
+ " 943 | \n",
+ "
\n",
+ " \n",
+ " | unique | \n",
+ " 2 | \n",
+ " 21 | \n",
+ " 795 | \n",
+ "
\n",
+ " \n",
+ " | top | \n",
+ " M | \n",
+ " student | \n",
+ " 55414 | \n",
+ "
\n",
+ " \n",
+ " | freq | \n",
+ " 670 | \n",
+ " 196 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gender occupation zip_code\n",
+ "count 943 943 943\n",
+ "unique 2 21 795\n",
+ "top M student 55414\n",
+ "freq 670 196 9"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "obj_cols = user_id.select_dtypes(include=\"object\")\n",
+ "obj_cols.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 16. Summarize only the occupation column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 943\n",
+ "unique 21\n",
+ "top student\n",
+ "freq 196\n",
+ "Name: occupation, dtype: object"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id[\"occupation\"].describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 17. What is the mean age of users?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "34.05196182396607"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id[\"age\"].mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 18. What is the age with least occurrence?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "30 39\n",
+ "25 38\n",
+ "22 37\n",
+ "28 36\n",
+ "27 35\n",
+ " ..\n",
+ "7 1\n",
+ "66 1\n",
+ "11 1\n",
+ "10 1\n",
+ "73 1\n",
+ "Name: age, Length: 61, dtype: int64"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id.age.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "7 1\n",
+ "66 1\n",
+ "11 1\n",
+ "10 1\n",
+ "73 1\n",
+ "Name: age, dtype: int64"
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id.age.value_counts().tail()"
+ ]
+ }
+ ],
+ "metadata": {
+ "anaconda-cloud": {},
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises_solved.ipynb b/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises_solved.ipynb
new file mode 100644
index 000000000..0b2e77779
--- /dev/null
+++ b/01_Getting_&_Knowing_Your_Data/World Food Facts/Exercises_solved.ipynb
@@ -0,0 +1,563 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Exercise 1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 1. Go to https://www.kaggle.com/openfoodfacts/world-food-facts/data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 2. Download the dataset to your computer and unzip it."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 3. Use the tsv file and assign it to a dataframe called food"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds\n",
+ "Your Kaggle username: \n",
+ "Your Kaggle username: \n",
+ "Your Kaggle username: chewziqing\n",
+ "Your Kaggle Key: ········\n",
+ "Downloading world-food-facts.zip to .\\world-food-facts\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|███████████████████████████████████████████████████████████████████████████████| 109M/109M [01:27<00:00, 1.30MB/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import opendatasets as od\n",
+ "od.download(\"https://www.kaggle.com/openfoodfacts/world-food-facts/data\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_12124\\2931642990.py:5: DtypeWarning: Columns (0,3,5,19,20,24,25,26,27,28,36,37,38,39,48) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " df = pd.read_csv(path, sep=\"\\t\")\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "path = \"world-food-facts/en.openfoodfacts.org.products.tsv\"\n",
+ "os.listdir(\"./world-food-facts/\")\n",
+ "df = pd.read_csv(path, sep=\"\\t\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 4. See the first 5 entries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " code | \n",
+ " url | \n",
+ " creator | \n",
+ " created_t | \n",
+ " created_datetime | \n",
+ " last_modified_t | \n",
+ " last_modified_datetime | \n",
+ " product_name | \n",
+ " generic_name | \n",
+ " quantity | \n",
+ " ... | \n",
+ " fruits-vegetables-nuts_100g | \n",
+ " fruits-vegetables-nuts-estimate_100g | \n",
+ " collagen-meat-protein-ratio_100g | \n",
+ " cocoa_100g | \n",
+ " chlorophyl_100g | \n",
+ " carbon-footprint_100g | \n",
+ " nutrition-score-fr_100g | \n",
+ " nutrition-score-uk_100g | \n",
+ " glycemic-index_100g | \n",
+ " water-hardness_100g | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 3087 | \n",
+ " http://world-en.openfoodfacts.org/product/0000... | \n",
+ " openfoodfacts-contributors | \n",
+ " 1474103866 | \n",
+ " 2016-09-17T09:17:46Z | \n",
+ " 1474103893 | \n",
+ " 2016-09-17T09:18:13Z | \n",
+ " Farine de blé noir | \n",
+ " NaN | \n",
+ " 1kg | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 4530 | \n",
+ " http://world-en.openfoodfacts.org/product/0000... | \n",
+ " usda-ndb-import | \n",
+ " 1489069957 | \n",
+ " 2017-03-09T14:32:37Z | \n",
+ " 1489069957 | \n",
+ " 2017-03-09T14:32:37Z | \n",
+ " Banana Chips Sweetened (Whole) | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 14.0 | \n",
+ " 14.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 4559 | \n",
+ " http://world-en.openfoodfacts.org/product/0000... | \n",
+ " usda-ndb-import | \n",
+ " 1489069957 | \n",
+ " 2017-03-09T14:32:37Z | \n",
+ " 1489069957 | \n",
+ " 2017-03-09T14:32:37Z | \n",
+ " Peanuts | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 16087 | \n",
+ " http://world-en.openfoodfacts.org/product/0000... | \n",
+ " usda-ndb-import | \n",
+ " 1489055731 | \n",
+ " 2017-03-09T10:35:31Z | \n",
+ " 1489055731 | \n",
+ " 2017-03-09T10:35:31Z | \n",
+ " Organic Salted Nut Mix | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 12.0 | \n",
+ " 12.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 16094 | \n",
+ " http://world-en.openfoodfacts.org/product/0000... | \n",
+ " usda-ndb-import | \n",
+ " 1489055653 | \n",
+ " 2017-03-09T10:34:13Z | \n",
+ " 1489055653 | \n",
+ " 2017-03-09T10:34:13Z | \n",
+ " Organic Polenta | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 163 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " code url \\\n",
+ "0 3087 http://world-en.openfoodfacts.org/product/0000... \n",
+ "1 4530 http://world-en.openfoodfacts.org/product/0000... \n",
+ "2 4559 http://world-en.openfoodfacts.org/product/0000... \n",
+ "3 16087 http://world-en.openfoodfacts.org/product/0000... \n",
+ "4 16094 http://world-en.openfoodfacts.org/product/0000... \n",
+ "\n",
+ " creator created_t created_datetime \\\n",
+ "0 openfoodfacts-contributors 1474103866 2016-09-17T09:17:46Z \n",
+ "1 usda-ndb-import 1489069957 2017-03-09T14:32:37Z \n",
+ "2 usda-ndb-import 1489069957 2017-03-09T14:32:37Z \n",
+ "3 usda-ndb-import 1489055731 2017-03-09T10:35:31Z \n",
+ "4 usda-ndb-import 1489055653 2017-03-09T10:34:13Z \n",
+ "\n",
+ " last_modified_t last_modified_datetime product_name \\\n",
+ "0 1474103893 2016-09-17T09:18:13Z Farine de blé noir \n",
+ "1 1489069957 2017-03-09T14:32:37Z Banana Chips Sweetened (Whole) \n",
+ "2 1489069957 2017-03-09T14:32:37Z Peanuts \n",
+ "3 1489055731 2017-03-09T10:35:31Z Organic Salted Nut Mix \n",
+ "4 1489055653 2017-03-09T10:34:13Z Organic Polenta \n",
+ "\n",
+ " generic_name quantity ... fruits-vegetables-nuts_100g \\\n",
+ "0 NaN 1kg ... NaN \n",
+ "1 NaN NaN ... NaN \n",
+ "2 NaN NaN ... NaN \n",
+ "3 NaN NaN ... NaN \n",
+ "4 NaN NaN ... NaN \n",
+ "\n",
+ " fruits-vegetables-nuts-estimate_100g collagen-meat-protein-ratio_100g \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " cocoa_100g chlorophyl_100g carbon-footprint_100g nutrition-score-fr_100g \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN 14.0 \n",
+ "2 NaN NaN NaN 0.0 \n",
+ "3 NaN NaN NaN 12.0 \n",
+ "4 NaN NaN NaN NaN \n",
+ "\n",
+ " nutrition-score-uk_100g glycemic-index_100g water-hardness_100g \n",
+ "0 NaN NaN NaN \n",
+ "1 14.0 NaN NaN \n",
+ "2 0.0 NaN NaN \n",
+ "3 12.0 NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "\n",
+ "[5 rows x 163 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 5. What is the number of observations in the dataset?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "356027"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 6. What is the number of columns in the dataset?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "163"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape[1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 7. Print the name of all the columns."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['code', 'url', 'creator', 'created_t', 'created_datetime',\n",
+ " 'last_modified_t', 'last_modified_datetime', 'product_name',\n",
+ " 'generic_name', 'quantity',\n",
+ " ...\n",
+ " 'fruits-vegetables-nuts_100g', 'fruits-vegetables-nuts-estimate_100g',\n",
+ " 'collagen-meat-protein-ratio_100g', 'cocoa_100g', 'chlorophyl_100g',\n",
+ " 'carbon-footprint_100g', 'nutrition-score-fr_100g',\n",
+ " 'nutrition-score-uk_100g', 'glycemic-index_100g',\n",
+ " 'water-hardness_100g'],\n",
+ " dtype='object', length=163)"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 8. What is the name of 105th column?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'-glucose_100g'"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns[104]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 9. What is the type of the observations of the 105th column?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dtype('float64')"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[df.columns[104]].dtype"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 10. How is the dataset indexed?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RangeIndex(start=0, stop=356027, step=1)"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.index"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 11. What is the product name of the 19th observation?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Lotus Organic Brown Jasmine Rice'"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.loc[18, \"product_name\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "anaconda-cloud": {},
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/02_Filtering_&_Sorting/Chipotle/Exercises_solved.ipynb b/02_Filtering_&_Sorting/Chipotle/Exercises_solved.ipynb
new file mode 100644
index 000000000..05646c6ed
--- /dev/null
+++ b/02_Filtering_&_Sorting/Chipotle/Exercises_solved.ipynb
@@ -0,0 +1,756 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Ex1 - Filtering and Sorting Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This time we are going to pull data directly from the internet.\n",
+ "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
+ "\n",
+ "### Step 1. Import the necessary libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 3. Assign it to a variable called chipo."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " order_id | \n",
+ " quantity | \n",
+ " item_name | \n",
+ " choice_description | \n",
+ " item_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Chips and Fresh Tomato Salsa | \n",
+ " NaN | \n",
+ " $2.39 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Izze | \n",
+ " [Clementine] | \n",
+ " $3.39 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Nantucket Nectar | \n",
+ " [Apple] | \n",
+ " $3.39 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Chips and Tomatillo-Green Chili Salsa | \n",
+ " NaN | \n",
+ " $2.39 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " Chicken Bowl | \n",
+ " [Tomatillo-Red Chili Salsa (Hot), [Black Beans... | \n",
+ " $16.98 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 4617 | \n",
+ " 1833 | \n",
+ " 1 | \n",
+ " Steak Burrito | \n",
+ " [Fresh Tomato Salsa, [Rice, Black Beans, Sour ... | \n",
+ " $11.75 | \n",
+ "
\n",
+ " \n",
+ " | 4618 | \n",
+ " 1833 | \n",
+ " 1 | \n",
+ " Steak Burrito | \n",
+ " [Fresh Tomato Salsa, [Rice, Sour Cream, Cheese... | \n",
+ " $11.75 | \n",
+ "
\n",
+ " \n",
+ " | 4619 | \n",
+ " 1834 | \n",
+ " 1 | \n",
+ " Chicken Salad Bowl | \n",
+ " [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... | \n",
+ " $11.25 | \n",
+ "
\n",
+ " \n",
+ " | 4620 | \n",
+ " 1834 | \n",
+ " 1 | \n",
+ " Chicken Salad Bowl | \n",
+ " [Fresh Tomato Salsa, [Fajita Vegetables, Lettu... | \n",
+ " $8.75 | \n",
+ "
\n",
+ " \n",
+ " | 4621 | \n",
+ " 1834 | \n",
+ " 1 | \n",
+ " Chicken Salad Bowl | \n",
+ " [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... | \n",
+ " $8.75 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4622 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " order_id quantity item_name \\\n",
+ "0 1 1 Chips and Fresh Tomato Salsa \n",
+ "1 1 1 Izze \n",
+ "2 1 1 Nantucket Nectar \n",
+ "3 1 1 Chips and Tomatillo-Green Chili Salsa \n",
+ "4 2 2 Chicken Bowl \n",
+ "... ... ... ... \n",
+ "4617 1833 1 Steak Burrito \n",
+ "4618 1833 1 Steak Burrito \n",
+ "4619 1834 1 Chicken Salad Bowl \n",
+ "4620 1834 1 Chicken Salad Bowl \n",
+ "4621 1834 1 Chicken Salad Bowl \n",
+ "\n",
+ " choice_description item_price \n",
+ "0 NaN $2.39 \n",
+ "1 [Clementine] $3.39 \n",
+ "2 [Apple] $3.39 \n",
+ "3 NaN $2.39 \n",
+ "4 [Tomatillo-Red Chili Salsa (Hot), [Black Beans... $16.98 \n",
+ "... ... ... \n",
+ "4617 [Fresh Tomato Salsa, [Rice, Black Beans, Sour ... $11.75 \n",
+ "4618 [Fresh Tomato Salsa, [Rice, Sour Cream, Cheese... $11.75 \n",
+ "4619 [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... $11.25 \n",
+ "4620 [Fresh Tomato Salsa, [Fajita Vegetables, Lettu... $8.75 \n",
+ "4621 [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... $8.75 \n",
+ "\n",
+ "[4622 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "path = \"https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv\"\n",
+ "chipo = pd.read_csv(path, sep=\"\\t\")\n",
+ "chipo"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 4. How many products cost more than $10.00?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dtype('O')"
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo.item_price.dtype"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# item_price != unit_price\n",
+ "def clean_price_col(chipo, col):\n",
+ " # remove $ symbol and convert to float\n",
+ " chipo[col] = chipo[col].apply(lambda x: x[1:].rstrip())\n",
+ " chipo[col] = chipo[col].astype(\"float\") \n",
+ " return chipo\n",
+ "\n",
+ "def remove_duplicate(chipo, cols):\n",
+ " return chipo.drop_duplicates(subset=cols)\n",
+ "\n",
+ "chipo = clean_price_col(chipo, \"item_price\")\n",
+ "new_chipo = remove_duplicate(chipo, [\"item_name\", \"quantity\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len( new_chipo[ (new_chipo.item_price > 10) & (new_chipo.quantity==1)] )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 5. What is the price of each item? \n",
+ "###### print a data frame with only two columns item_name and item_price"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " item_name | \n",
+ " item_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 606 | \n",
+ " Steak Salad Bowl | \n",
+ " 11.89 | \n",
+ "
\n",
+ " \n",
+ " | 1229 | \n",
+ " Barbacoa Salad Bowl | \n",
+ " 11.89 | \n",
+ "
\n",
+ " \n",
+ " | 1132 | \n",
+ " Carnitas Salad Bowl | \n",
+ " 11.89 | \n",
+ "
\n",
+ " \n",
+ " | 93 | \n",
+ " Carnitas Burrito | \n",
+ " 11.75 | \n",
+ "
\n",
+ " \n",
+ " | 1008 | \n",
+ " Carnitas Crispy Tacos | \n",
+ " 11.75 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " Side of Chips | \n",
+ " 1.69 | \n",
+ "
\n",
+ " \n",
+ " | 329 | \n",
+ " Bottled Water | \n",
+ " 1.50 | \n",
+ "
\n",
+ " \n",
+ " | 263 | \n",
+ " Canned Soft Drink | \n",
+ " 1.25 | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " Canned Soda | \n",
+ " 1.09 | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " Bottled Water | \n",
+ " 1.09 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
135 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " item_name item_price\n",
+ "606 Steak Salad Bowl 11.89\n",
+ "1229 Barbacoa Salad Bowl 11.89\n",
+ "1132 Carnitas Salad Bowl 11.89\n",
+ "93 Carnitas Burrito 11.75\n",
+ "1008 Carnitas Crispy Tacos 11.75\n",
+ "... ... ...\n",
+ "6 Side of Chips 1.69\n",
+ "329 Bottled Water 1.50\n",
+ "263 Canned Soft Drink 1.25\n",
+ "28 Canned Soda 1.09\n",
+ "34 Bottled Water 1.09\n",
+ "\n",
+ "[135 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 76,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_chipo_2 = chipo.drop_duplicates(subset=[\"item_name\", \"item_price\"])\n",
+ "new_chipo_2 = new_chipo_2[new_chipo_2.quantity==1]\n",
+ "\n",
+ "new_chipo_2.loc[:, [\"item_name\", \"item_price\"]].sort_values(by=\"item_price\", ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 6. Sort by the name of the item"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " order_id | \n",
+ " quantity | \n",
+ " item_name | \n",
+ " choice_description | \n",
+ " item_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 3389 | \n",
+ " 1360 | \n",
+ " 2 | \n",
+ " 6 Pack Soft Drink | \n",
+ " [Diet Coke] | \n",
+ " 12.98 | \n",
+ "
\n",
+ " \n",
+ " | 341 | \n",
+ " 148 | \n",
+ " 1 | \n",
+ " 6 Pack Soft Drink | \n",
+ " [Diet Coke] | \n",
+ " 6.49 | \n",
+ "
\n",
+ " \n",
+ " | 1849 | \n",
+ " 749 | \n",
+ " 1 | \n",
+ " 6 Pack Soft Drink | \n",
+ " [Coke] | \n",
+ " 6.49 | \n",
+ "
\n",
+ " \n",
+ " | 1860 | \n",
+ " 754 | \n",
+ " 1 | \n",
+ " 6 Pack Soft Drink | \n",
+ " [Diet Coke] | \n",
+ " 6.49 | \n",
+ "
\n",
+ " \n",
+ " | 2713 | \n",
+ " 1076 | \n",
+ " 1 | \n",
+ " 6 Pack Soft Drink | \n",
+ " [Coke] | \n",
+ " 6.49 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 2384 | \n",
+ " 948 | \n",
+ " 1 | \n",
+ " Veggie Soft Tacos | \n",
+ " [Roasted Chili Corn Salsa, [Fajita Vegetables,... | \n",
+ " 8.75 | \n",
+ "
\n",
+ " \n",
+ " | 781 | \n",
+ " 322 | \n",
+ " 1 | \n",
+ " Veggie Soft Tacos | \n",
+ " [Fresh Tomato Salsa, [Black Beans, Cheese, Sou... | \n",
+ " 8.75 | \n",
+ "
\n",
+ " \n",
+ " | 2851 | \n",
+ " 1132 | \n",
+ " 1 | \n",
+ " Veggie Soft Tacos | \n",
+ " [Roasted Chili Corn Salsa (Medium), [Black Bea... | \n",
+ " 8.49 | \n",
+ "
\n",
+ " \n",
+ " | 1699 | \n",
+ " 688 | \n",
+ " 1 | \n",
+ " Veggie Soft Tacos | \n",
+ " [Fresh Tomato Salsa, [Fajita Vegetables, Rice,... | \n",
+ " 11.25 | \n",
+ "
\n",
+ " \n",
+ " | 1395 | \n",
+ " 567 | \n",
+ " 1 | \n",
+ " Veggie Soft Tacos | \n",
+ " [Fresh Tomato Salsa (Mild), [Pinto Beans, Rice... | \n",
+ " 8.49 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4622 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " order_id quantity item_name \\\n",
+ "3389 1360 2 6 Pack Soft Drink \n",
+ "341 148 1 6 Pack Soft Drink \n",
+ "1849 749 1 6 Pack Soft Drink \n",
+ "1860 754 1 6 Pack Soft Drink \n",
+ "2713 1076 1 6 Pack Soft Drink \n",
+ "... ... ... ... \n",
+ "2384 948 1 Veggie Soft Tacos \n",
+ "781 322 1 Veggie Soft Tacos \n",
+ "2851 1132 1 Veggie Soft Tacos \n",
+ "1699 688 1 Veggie Soft Tacos \n",
+ "1395 567 1 Veggie Soft Tacos \n",
+ "\n",
+ " choice_description item_price \n",
+ "3389 [Diet Coke] 12.98 \n",
+ "341 [Diet Coke] 6.49 \n",
+ "1849 [Coke] 6.49 \n",
+ "1860 [Diet Coke] 6.49 \n",
+ "2713 [Coke] 6.49 \n",
+ "... ... ... \n",
+ "2384 [Roasted Chili Corn Salsa, [Fajita Vegetables,... 8.75 \n",
+ "781 [Fresh Tomato Salsa, [Black Beans, Cheese, Sou... 8.75 \n",
+ "2851 [Roasted Chili Corn Salsa (Medium), [Black Bea... 8.49 \n",
+ "1699 [Fresh Tomato Salsa, [Fajita Vegetables, Rice,... 11.25 \n",
+ "1395 [Fresh Tomato Salsa (Mild), [Pinto Beans, Rice... 8.49 \n",
+ "\n",
+ "[4622 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo.sort_values(\"item_name\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 7. What was the quantity of the most expensive item ordered?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 4622.000000\n",
+ "mean 7.464336\n",
+ "std 4.245557\n",
+ "min 1.090000\n",
+ "25% 3.390000\n",
+ "50% 8.750000\n",
+ "75% 9.250000\n",
+ "max 44.250000\n",
+ "Name: item_price, dtype: float64"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chipo[\"item_price\"].describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " order_id | \n",
+ " quantity | \n",
+ " item_name | \n",
+ " choice_description | \n",
+ " item_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 3598 | \n",
+ " 1443 | \n",
+ " 15 | \n",
+ " Chips and Fresh Tomato Salsa | \n",
+ " NaN | \n",
+ " 44.25 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " order_id quantity item_name choice_description \\\n",
+ "3598 1443 15 Chips and Fresh Tomato Salsa NaN \n",
+ "\n",
+ " item_price \n",
+ "3598 44.25 "
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "max_price = chipo.item_price.max()\n",
+ "\n",
+ "max_price_item = chipo[chipo.item_price == max_price]\n",
+ "max_price_item"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 8. How many times was a Veggie Salad Bowl ordered?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "18"
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(chipo[chipo.item_name == \"Veggie Salad Bowl\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 9. How many times did someone order more than one Canned Soda?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "20"
+ ]
+ },
+ "execution_count": 78,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len( chipo[(chipo.item_name == \"Canned Soda\") & (chipo.quantity>1)] )"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}