diff --git a/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb index 03e1fc603..d9e2280ed 100644 --- a/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb +++ b/01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb @@ -19,12 +19,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], - "source": [] + "source": [ + "import pandas as pd" + ] }, { "cell_type": "markdown", @@ -34,10 +36,22 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 5, "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv',delimiter='\\t')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], "source": [ - "### Step 3. Assign it to a variable called chipo." + "### Step 3. Assign it to a variable called chipo.\n", + "chipo = df" ] }, { @@ -58,13 +72,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "collapsed": false, "scrolled": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": " order_id quantity item_name \\\n0 1 1 Chips and Fresh Tomato Salsa \n1 1 1 Izze \n2 1 1 Nantucket Nectar \n3 1 1 Chips and Tomatillo-Green Chili Salsa \n4 2 2 Chicken Bowl \n5 3 1 Chicken Bowl \n6 3 1 Side of Chips \n7 4 1 Steak Burrito \n8 4 1 Steak Soft Tacos \n9 5 1 Steak Burrito \n\n choice_description item_price \n0 NaN $2.39 \n1 [Clementine] $3.39 \n2 [Apple] $3.39 \n3 NaN $2.39 \n4 [Tomatillo-Red Chili Salsa (Hot), [Black Beans... $16.98 \n5 [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou... $10.98 \n6 NaN $1.69 \n7 [Tomatillo Red Chili Salsa, [Fajita Vegetables... $11.75 \n8 [Tomatillo Green Chili Salsa, [Pinto Beans, Ch... $9.25 \n9 [Fresh Tomato Salsa, [Rice, Black Beans, Pinto... $9.25 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
order_idquantityitem_namechoice_descriptionitem_price
011Chips and Fresh Tomato SalsaNaN$2.39
111Izze[Clementine]$3.39
211Nantucket Nectar[Apple]$3.39
311Chips and Tomatillo-Green Chili SalsaNaN$2.39
422Chicken Bowl[Tomatillo-Red Chili Salsa (Hot), [Black Beans...$16.98
531Chicken Bowl[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...$10.98
631Side of ChipsNaN$1.69
741Steak Burrito[Tomatillo Red Chili Salsa, [Fajita Vegetables...$11.75
841Steak Soft Tacos[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...$9.25
951Steak Burrito[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...$9.25
\n
" + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "chipo.head(10)" + ] }, { "cell_type": "markdown", @@ -75,26 +101,41 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "metadata": { - "collapsed": false + "collapsed": false, + "tags": [] }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "4622\n" + } + ], "source": [ "# Solution 1\n", - "\n" + "print(len(chipo))\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 12, "metadata": { - "collapsed": false + "collapsed": false, + "tags": [] }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "4622\n" + } + ], "source": [ "# Solution 2\n", - "\n" + "\n", + "print(chipo.shape[0])" ] }, { @@ -106,12 +147,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { - "collapsed": false + "collapsed": false, + "tags": [] }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "5\n" + } + ], + "source": [ + "print(chipo.shape[1])" + ] }, { "cell_type": "markdown", @@ -122,12 +172,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": { - "collapsed": false + "collapsed": false, + "tags": [] }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "['order_id' 'quantity' 'item_name' 'choice_description' 'item_price']\n" + } + ], + "source": [ + "print(chipo.columns.values)" + ] }, { "cell_type": "markdown", @@ -138,12 +197,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": { - "collapsed": false + "collapsed": false, + "tags": [] }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "RangeIndex(start=0, stop=4622, step=1)\n" + } + ], + "source": [ + "print(chipo.index)" + ] }, { "cell_type": "markdown", @@ -154,12 +222,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "'Chicken Bowl'" + }, + "metadata": {}, + "execution_count": 27 + } + ], + "source": [ + "chipo.item_name.value_counts().index[0]" + ] }, { "cell_type": "markdown", @@ -170,12 +249,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "726" + }, + "metadata": {}, + "execution_count": 28 + } + ], + "source": [ + "chipo.item_name.value_counts()[0]" + ] }, { "cell_type": "markdown", @@ -186,12 +276,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "'[Diet Coke]'" + }, + "metadata": {}, + "execution_count": 29 + } + ], + "source": [ + "chipo.choice_description.value_counts().index[0]" + ] }, { "cell_type": "markdown", @@ -202,12 +303,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "4972" + }, + "metadata": {}, + "execution_count": 30 + } + ], + "source": [ + "chipo.quantity.sum()" + ] }, { "cell_type": "markdown", @@ -225,12 +337,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "dtype('O')" + }, + "metadata": {}, + "execution_count": 32 + } + ], + "source": [ + "chipo.item_price.dtype" + ] }, { "cell_type": "markdown", @@ -241,12 +364,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": { "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "chipo.item_price = chipo.item_price.map(lambda x: float(x.replace('$','')))" + ] }, { "cell_type": "markdown", @@ -257,12 +382,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "dtype('float64')" + }, + "metadata": {}, + "execution_count": 34 + } + ], + "source": [ + "chipo.item_price.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "0 2.39\n1 3.39\n2 3.39\n3 2.39\n4 16.98\n ... \n4617 11.75\n4618 11.75\n4619 11.25\n4620 8.75\n4621 8.75\nName: item_price, Length: 4622, dtype: float64" + }, + "metadata": {}, + "execution_count": 35 + } + ], + "source": [ + "chipo.item_price" + ] }, { "cell_type": "markdown", @@ -273,12 +427,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "39237.02" + }, + "metadata": {}, + "execution_count": 36 + } + ], + "source": [ + "revenue = chipo.item_price * chipo.quantity\n", + "revenue.sum()" + ] }, { "cell_type": "markdown", @@ -289,12 +455,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": { "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "1834" + }, + "metadata": {}, + "execution_count": 37 + } + ], + "source": [ + "chipo.order_id.nunique()" + ] }, { "cell_type": "markdown", @@ -305,14 +482,24 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 40, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "21.39423118865867" + }, + "metadata": {}, + "execution_count": 40 + } + ], "source": [ "# Solution 1\n", - "\n" + "chipo['revenue'] = chipo.item_price * chipo.quantity\n", + "chipo.groupby('order_id')['revenue'].sum().mean()\n" ] }, { @@ -336,10 +523,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": { "collapsed": false }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": " order_id quantity item_name \\\n0 1 1 Chips and Fresh Tomato Salsa \n1 1 1 Izze \n2 1 1 Nantucket Nectar \n3 1 1 Chips and Tomatillo-Green Chili Salsa \n4 2 2 Chicken Bowl \n\n choice_description item_price revenue \n0 NaN 2.39 2.39 \n1 [Clementine] 3.39 3.39 \n2 [Apple] 3.39 3.39 \n3 NaN 2.39 2.39 \n4 [Tomatillo-Red Chili Salsa (Hot), [Black Beans... 16.98 33.96 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
order_idquantityitem_namechoice_descriptionitem_pricerevenue
011Chips and Fresh Tomato SalsaNaN2.392.39
111Izze[Clementine]3.393.39
211Nantucket Nectar[Apple]3.393.39
311Chips and Tomatillo-Green Chili SalsaNaN2.392.39
422Chicken Bowl[Tomatillo-Red Chili Salsa (Hot), [Black Beans...16.9833.96
\n
" + }, + "metadata": {}, + "execution_count": 41 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "50" + }, + "metadata": {}, + "execution_count": 42 + } + ], + "source": [ + "chipo.item_name.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [] } @@ -347,9 +571,9 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3.7.7 64-bit", "language": "python", - "name": "python2" + "name": "python_defaultSpec_1599526098309" }, "language_info": { "codemirror_mode": { @@ -361,9 +585,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.12" + "version": "3.7.7-final" } }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file