diff --git a/pandas.ipynb b/pandas.ipynb
index f0a8a33..475ad0f 100644
--- a/pandas.ipynb
+++ b/pandas.ipynb
@@ -15,7 +15,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -32,7 +32,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -96,9 +96,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([5, 7, 9])"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Numpy array addition: \n",
"\n",
@@ -108,9 +119,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1, 2, 3, 4, 5, 6])"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Numpy array concatenation: \n",
"\n",
@@ -153,9 +175,108 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " total_bill | \n",
+ " tip | \n",
+ " sex | \n",
+ " smoker | \n",
+ " day | \n",
+ " time | \n",
+ " size | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 16.99 | \n",
+ " 1.01 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 10.34 | \n",
+ " 1.66 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 21.01 | \n",
+ " 3.50 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 23.68 | \n",
+ " 3.31 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 24.59 | \n",
+ " 3.61 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " total_bill tip sex smoker day time size\n",
+ "0 16.99 1.01 Female No Sun Dinner 2\n",
+ "1 10.34 1.66 Male No Sun Dinner 3\n",
+ "2 21.01 3.50 Male No Sun Dinner 3\n",
+ "3 23.68 3.31 Male No Sun Dinner 2\n",
+ "4 24.59 3.61 Female No Sun Dinner 4"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import pandas as pd\n",
"\n",
@@ -164,6 +285,28 @@
"tips.head(5)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "pandas.core.series.Series"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(tips)\n",
+ "\n",
+ "type(tips.tip)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {
@@ -203,7 +346,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
@@ -213,9 +356,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 35,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.0"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Accessing a certain value via the index\n",
"\n",
@@ -224,9 +378,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 1., 15., -5., nan, 4., 123., 0., 78., 0., 5., -4.])"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Note that there are a bunch of attributes.\n",
"# .values returns a numpy ndarray of the values! \n",
@@ -236,21 +401,58 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RangeIndex(start=0, stop=11, step=1)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Take a look at the index. What type is it? \n",
"# You convert itto a numpy ndarray by adding \".values\" again!\n",
"\n",
- "my_series.index"
+ "my_series.index\n",
+ "\n",
+ "#type(my_series.index)\n",
+ "\n",
+ "#np.arange([1,2,3])"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "om 1.0\n",
+ "ir 15.0\n",
+ "os -5.0\n",
+ "pap NaN\n",
+ "pas 4.0\n",
+ "pil 123.0\n",
+ "io 0.0\n",
+ "po 78.0\n",
+ "ulos 0.0\n",
+ "is 5.0\n",
+ "best -4.0\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# You can overwrite the index directly: \n",
"\n",
@@ -276,9 +478,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(1.0, 1.0)"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Series that have string indices can also be accessed via a RangeIndex\n",
"# (which is similar to the index of a regular Python list)\n",
@@ -288,9 +501,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(1.0, -5.0)"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Note that indices can get moved around, by sorting for example!\n",
"# iloc gives you the element you would get if the Series\n",
@@ -301,7 +525,9 @@
"\n",
"x = my_series.sort_values()\n",
"\n",
- "x[0], x.iloc[0]"
+ "x\n",
+ "\n",
+ "x[0], x.iloc[0] # iloc returns the first item in the column, not the value with index 0"
]
},
{
@@ -321,13 +547,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 True\n",
+ "1 False\n",
+ "2 True\n",
+ "3 True\n",
+ "dtype: bool\n"
+ ]
+ }
+ ],
"source": [
"Series1 = pd.Series([1,3,5,7])\n",
"Series2 = pd.Series([0,10,-1,6])\n",
@@ -336,6 +574,7 @@
"\n",
"Series4 = Series1 > Series2 \n",
"\n",
+ "print(Series4)\n",
"# Take a look at the different Series objects!"
]
},
@@ -364,17 +603,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 28,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "nan"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"Series1 = pd.Series([1,10],index=[\"om\",\"iros\"])\n",
"Series2 = pd.Series([4,-1],index=[\"pap\",\"as\"])\n",
- "Series3 = Series1 + Series2"
+ "Series3 = Series1 + Series2\n",
+ "\n",
+ "#Series2['om']\n",
+ "Series3.iloc[0]/5"
]
},
{
@@ -388,6 +641,32 @@
"This aspect makes it very easy to work with series that we have sorted or manipulated otherwise; there is always the address to access a value. This helps prevent accidentally combining values we didn't mean to combine!"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "foo 1\n",
+ "fog 2\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "my_series=pd.Series([1,2,3],index=['foo','bar','baz'])\n",
+ "\n",
+ "my_series=pd.Series({'foo':1,'fog':2})\n",
+ "\n",
+ "my_series"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {
@@ -414,21 +693,58 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 37,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "om 1.0\n",
+ "pap NaN\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# accesing by list of index labels\n",
"\n",
"my_series.index = [\"om\",\"ir\",\"os\",\"pap\",\"pas\",\"pil\",\"io\",\"po\",\"ulos\",\"is\",\"best\"]\n",
- "x = my_series[[\"om\",\"pap\"]]"
+ "x = my_series[[\"om\",\"pap\"]]\n",
+ "\n",
+ "x"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 38,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "om False\n",
+ "ir False\n",
+ "os False\n",
+ "pap False\n",
+ "pas False\n",
+ "pil False\n",
+ "io True\n",
+ "po False\n",
+ "ulos True\n",
+ "is False\n",
+ "best False\n",
+ "dtype: bool"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# getting a boolean-valued series by checking a condition\n",
"\n",
@@ -438,14 +754,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 41,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "io 0.0\n",
+ "ulos 0.0\n",
+ "dtype: float64\n",
+ "io 0.0\n",
+ "ulos 0.0\n",
+ "dtype: float64\n"
+ ]
+ }
+ ],
"source": [
"# Notice the index of x is a SUBSET of the index of \"my_series\"\n",
"# This can be useful when needing to relate values back to the original \"my_series\"!\n",
"\n",
- "x = my_series[choose]"
+ "x = my_series[choose]\n",
+ "\n",
+ "y=my_series[my_series==0]\n",
+ "\n",
+ "print(y)\n",
+ "print(x)"
]
},
{
@@ -468,16 +802,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 46,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "om 1.0\n",
+ "pas 4.0\n",
+ "is 5.0\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Challenge: \n",
"\n",
"# Filter \"my_series\" to be all the elements that are NOT\n",
"# equal to 0, using the \"choose\" boolean mask below: \n",
"\n",
- "choose = my_series == 0.0\n"
+ "choose = my_series == 0.0\n",
+ "\n",
+ "x=my_series[~choose]\n",
+ "\n",
+ "x\n",
+ "\n",
+ "my_series[(my_series<10.)& (my_series>0.)]\n"
]
},
{
@@ -543,9 +897,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 47,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 foo\n",
+ "1 bar\n",
+ "3 baz\n",
+ "4 qux\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Challenge: \n",
"# Get a list of names, without the Null values!\n",
@@ -554,7 +923,11 @@
"# 1. Create a boolean mask by using the .notna() method.\n",
"# 2. Use the mask to subset the Series.\n",
"\n",
- "names = pd.Series(['foo','bar',None,'baz','qux',None])\n"
+ "names = pd.Series(['foo','bar',None,'baz','qux',None])\n",
+ "\n",
+ "mask=names.notna()\n",
+ "\n",
+ "names[mask]\n"
]
},
{
@@ -583,9 +956,44 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 57,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "foo\n",
+ "bar\n",
+ "None\n",
+ "foo\n",
+ "None\n",
+ "bar\n",
+ "bar\n",
+ "foo\n",
+ "None\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "0 foo\n",
+ "1 bar\n",
+ "2 None\n",
+ "3 foo\n",
+ "4 None\n",
+ "5 bar\n",
+ "6 bar\n",
+ "7 foo\n",
+ "8 None\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Challenge: \n",
"\n",
@@ -595,26 +1003,57 @@
"\n",
"\n",
"def lower(s):\n",
- " # Your code here\n",
- " # HINT: delete the \"pass\" when your done\n",
- " # HINT2: handle None values!\n",
- " pass\n",
+ " \n",
+ " return s.map(lambda x:x.lower(), na_action='ignore')\n",
+ "\n",
+ "\n",
+ "# Easier to test\n",
+ "def lower(s):\n",
+ " try:\n",
+ " return s.lower()\n",
+ " except AttributeError:\n",
+ " return None\n",
+ " \n",
+ "\n",
"\n",
+ "names = pd.Series(['Foo', 'BAR', None, 'foo', None, 'bar', 'bAR', 'foo', None])\n",
"\n",
- "names = pd.Series(['Foo', 'BAR', None, 'foo', None, 'bar', 'bAR', 'foo', None])"
+ "lower(names)\n",
+ "names.map(lower)\n",
+ "\n",
+ "for i in names:\n",
+ " print(lower(i))\n",
+ " \n",
+ "pd.Series([lower(n) for n in names])"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 65,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0.33333333])"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Challenge: \n",
"\n",
"# Using the series from above, now lowercased, count the occurences of each name\n",
"# Hint: It's simple, just use .value_counts()!\n",
- "\n"
+ "\n",
+ "names.map(lower).value_counts(dropna=False)\n",
+ "\n",
+ "names.map(lower).isna().sum()/names.shape\n",
+ "\n",
+ "type(names.shape)\n"
]
},
{
@@ -636,9 +1075,163 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 66,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " total_bill | \n",
+ " tip | \n",
+ " sex | \n",
+ " smoker | \n",
+ " day | \n",
+ " time | \n",
+ " size | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 16.99 | \n",
+ " 1.01 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 10.34 | \n",
+ " 1.66 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 21.01 | \n",
+ " 3.50 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 23.68 | \n",
+ " 3.31 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 24.59 | \n",
+ " 3.61 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 25.29 | \n",
+ " 4.71 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 8.77 | \n",
+ " 2.00 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 26.88 | \n",
+ " 3.12 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 15.04 | \n",
+ " 1.96 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 14.78 | \n",
+ " 3.23 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " total_bill tip sex smoker day time size\n",
+ "0 16.99 1.01 Female No Sun Dinner 2\n",
+ "1 10.34 1.66 Male No Sun Dinner 3\n",
+ "2 21.01 3.50 Male No Sun Dinner 3\n",
+ "3 23.68 3.31 Male No Sun Dinner 2\n",
+ "4 24.59 3.61 Female No Sun Dinner 4\n",
+ "5 25.29 4.71 Male No Sun Dinner 4\n",
+ "6 8.77 2.00 Male No Sun Dinner 2\n",
+ "7 26.88 3.12 Male No Sun Dinner 4\n",
+ "8 15.04 1.96 Male No Sun Dinner 2\n",
+ "9 14.78 3.23 Male No Sun Dinner 2"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"tips = pd.read_csv(\"tips.csv\")\n",
"tips.head(10) # the first method of our dataframe object! "
@@ -646,15 +1239,58 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 67,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')"
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# the other important attribute: name of rows and columns\n",
"tips.index\n",
"tips.columns"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 Female\n",
+ "1 Male\n",
+ "2 Male\n",
+ "3 Male\n",
+ "4 Female\n",
+ " ... \n",
+ "239 Male\n",
+ "240 Female\n",
+ "241 Male\n",
+ "242 Male\n",
+ "243 Female\n",
+ "Name: sex, Length: 244, dtype: object"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips.size\n",
+ "tips.sex"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {
@@ -679,6 +1315,33 @@
"```"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "foo 1\n",
+ "foo 2\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips[\"size\"].corr(tips.tip)\n",
+ "\n",
+ "# Not unique indexes\n",
+ "s=pd.Series([1,2],index=['foo','foo'])\n",
+ "\n",
+ "s['foo']"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {
@@ -733,9 +1396,60 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 70,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sex | \n",
+ " smoker | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " Male | \n",
+ " No | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Male | \n",
+ " No | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sex smoker\n",
+ "1 Male No\n",
+ "3 Male No"
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Accessing rows AND columns!\n",
"# Example of 2-dimension loc\n",
@@ -745,27 +1459,309 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 82,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sex | \n",
+ " smoker | \n",
+ " day | \n",
+ " time | \n",
+ " size | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sex smoker day time size\n",
+ "0 Female No Sun Dinner 2\n",
+ "3 Male No Sun Dinner 2"
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Accessing rows AND columns!\n",
"# Example of 2-dimensional iloc\n",
"\n",
- "tips.iloc[[1,3], 2:]"
+ "tips.iloc[[1,3], 2:]\n",
+ "tips.iloc[[0,3], 2:]"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 83,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sex | \n",
+ " tip | \n",
+ " day | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 20 | \n",
+ " Male | \n",
+ " 4.08 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " Female | \n",
+ " 2.75 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " Female | \n",
+ " 2.23 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " Male | \n",
+ " 7.58 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " Male | \n",
+ " 3.18 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " Male | \n",
+ " 2.34 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " Male | \n",
+ " 2.00 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " Male | \n",
+ " 2.00 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " Male | \n",
+ " 4.30 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " Female | \n",
+ " 3.00 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " Male | \n",
+ " 1.45 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " Male | \n",
+ " 2.50 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " Female | \n",
+ " 3.00 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " Female | \n",
+ " 2.45 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " Male | \n",
+ " 3.27 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 35 | \n",
+ " Male | \n",
+ " 3.60 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 36 | \n",
+ " Male | \n",
+ " 2.00 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 37 | \n",
+ " Female | \n",
+ " 3.07 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 38 | \n",
+ " Male | \n",
+ " 2.31 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 39 | \n",
+ " Male | \n",
+ " 5.00 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 40 | \n",
+ " Male | \n",
+ " 2.24 | \n",
+ " Sat | \n",
+ "
\n",
+ " \n",
+ " | 41 | \n",
+ " Male | \n",
+ " 2.54 | \n",
+ " Sun | \n",
+ "
\n",
+ " \n",
+ " | 42 | \n",
+ " Male | \n",
+ " 3.06 | \n",
+ " Sun | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " Male | \n",
+ " 1.32 | \n",
+ " Sun | \n",
+ "
\n",
+ " \n",
+ " | 44 | \n",
+ " Male | \n",
+ " 5.60 | \n",
+ " Sun | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sex tip day\n",
+ "20 Male 4.08 Sat\n",
+ "21 Female 2.75 Sat\n",
+ "22 Female 2.23 Sat\n",
+ "23 Male 7.58 Sat\n",
+ "24 Male 3.18 Sat\n",
+ "25 Male 2.34 Sat\n",
+ "26 Male 2.00 Sat\n",
+ "27 Male 2.00 Sat\n",
+ "28 Male 4.30 Sat\n",
+ "29 Female 3.00 Sat\n",
+ "30 Male 1.45 Sat\n",
+ "31 Male 2.50 Sat\n",
+ "32 Female 3.00 Sat\n",
+ "33 Female 2.45 Sat\n",
+ "34 Male 3.27 Sat\n",
+ "35 Male 3.60 Sat\n",
+ "36 Male 2.00 Sat\n",
+ "37 Female 3.07 Sat\n",
+ "38 Male 2.31 Sat\n",
+ "39 Male 5.00 Sat\n",
+ "40 Male 2.24 Sat\n",
+ "41 Male 2.54 Sun\n",
+ "42 Male 3.06 Sun\n",
+ "43 Male 1.32 Sun\n",
+ "44 Male 5.60 Sun"
+ ]
+ },
+ "execution_count": 83,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Challenge:\n",
"\n",
"# Using the tips dataframe, create a new one that contains the \n",
"# information contained in all rows between the 20th (inclusive) \n",
- "# and the 45th (exclusive) and only the columns: tip, sex, day"
+ "# and the 45th (exclusive) and only the columns: tip, sex, day\n",
+ "\n",
+ "#t=tips[[\"sex\",\"tip\",\"day\"]]\n",
+ "\n",
+ "#t.iloc[20:45,:]\n",
+ "\n",
+ "tips.loc[20:44,['sex','tip','day']]"
]
},
{
@@ -785,6 +1781,193 @@
"etc\n"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " total_bill | \n",
+ " tip | \n",
+ " sex | \n",
+ " smoker | \n",
+ " day | \n",
+ " time | \n",
+ " size | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 16.99 | \n",
+ " 1.01 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 10.34 | \n",
+ " 1.66 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 21.01 | \n",
+ " 3.50 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 23.68 | \n",
+ " 3.31 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 24.59 | \n",
+ " 3.61 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 239 | \n",
+ " 29.03 | \n",
+ " 5.92 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 240 | \n",
+ " 27.18 | \n",
+ " 2.00 | \n",
+ " Female | \n",
+ " Yes | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 241 | \n",
+ " 22.67 | \n",
+ " 2.00 | \n",
+ " Male | \n",
+ " Yes | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 242 | \n",
+ " 17.82 | \n",
+ " 1.75 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 243 | \n",
+ " 18.78 | \n",
+ " 3.00 | \n",
+ " Female | \n",
+ " No | \n",
+ " Thur | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
244 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " total_bill tip sex smoker day time size\n",
+ "0 16.99 1.01 Female No Sun Dinner 2\n",
+ "1 10.34 1.66 Male No Sun Dinner 3\n",
+ "2 21.01 3.50 Female No Sun Dinner 3\n",
+ "3 23.68 3.31 foo No Sun Dinner 2\n",
+ "4 24.59 3.61 Female No Sun Dinner 4\n",
+ ".. ... ... ... ... ... ... ...\n",
+ "239 29.03 5.92 foo No Sat Dinner 3\n",
+ "240 27.18 2.00 Female Yes Sat Dinner 2\n",
+ "241 22.67 2.00 Male Yes Sat Dinner 2\n",
+ "242 17.82 1.75 Male No Sat Dinner 2\n",
+ "243 18.78 3.00 Female No Thur Dinner 2\n",
+ "\n",
+ "[244 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 94,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips.loc[2]\n",
+ "tips.loc[2,[\"sex\",\"smoker\"]]\n",
+ "\n",
+ "# Set something in the dataframe\n",
+ "tips.loc[2,\"sex\"]=\"Female\"\n",
+ "\n",
+ "tips.loc[2,[\"sex\",\"smoker\"]]\n",
+ "\n",
+ "tips.loc[(tips.sex=='Male')&(tips.tip>2.),'sex']=\"foo\"\n",
+ "\n",
+ "tips"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {
@@ -834,17 +2017,669 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 96,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " total_bill | \n",
+ " tip | \n",
+ " sex | \n",
+ " smoker | \n",
+ " day | \n",
+ " time | \n",
+ " size | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 3 | \n",
+ " 23.68 | \n",
+ " 3.31 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 183 | \n",
+ " 23.17 | \n",
+ " 6.50 | \n",
+ " foo | \n",
+ " Yes | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 107 | \n",
+ " 25.21 | \n",
+ " 4.29 | \n",
+ " foo | \n",
+ " Yes | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " 13.37 | \n",
+ " 2.00 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 225 | \n",
+ " 16.27 | \n",
+ " 2.50 | \n",
+ " Female | \n",
+ " Yes | \n",
+ " Fri | \n",
+ " Lunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 25.29 | \n",
+ " 4.71 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 16.97 | \n",
+ " 3.50 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 230 | \n",
+ " 24.01 | \n",
+ " 2.00 | \n",
+ " Male | \n",
+ " Yes | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 211 | \n",
+ " 25.89 | \n",
+ " 5.16 | \n",
+ " foo | \n",
+ " Yes | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 138 | \n",
+ " 16.00 | \n",
+ " 2.00 | \n",
+ " Male | \n",
+ " Yes | \n",
+ " Thur | \n",
+ " Lunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 57 | \n",
+ " 26.41 | \n",
+ " 1.50 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " 17.78 | \n",
+ " 3.27 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 179 | \n",
+ " 34.63 | \n",
+ " 3.55 | \n",
+ " foo | \n",
+ " Yes | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 155 | \n",
+ " 29.85 | \n",
+ " 5.14 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 20.65 | \n",
+ " 3.35 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 153 | \n",
+ " 24.55 | \n",
+ " 2.00 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 148 | \n",
+ " 9.78 | \n",
+ " 1.73 | \n",
+ " Male | \n",
+ " No | \n",
+ " Thur | \n",
+ " Lunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 100 | \n",
+ " 11.35 | \n",
+ " 2.50 | \n",
+ " Female | \n",
+ " Yes | \n",
+ " Fri | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 235 | \n",
+ " 10.07 | \n",
+ " 1.25 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " 16.99 | \n",
+ " 1.01 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 17.92 | \n",
+ " 4.08 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 216 | \n",
+ " 28.15 | \n",
+ " 3.00 | \n",
+ " foo | \n",
+ " Yes | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 137 | \n",
+ " 14.15 | \n",
+ " 2.00 | \n",
+ " Female | \n",
+ " No | \n",
+ " Thur | \n",
+ " Lunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 166 | \n",
+ " 20.76 | \n",
+ " 2.24 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 122 | \n",
+ " 14.26 | \n",
+ " 2.50 | \n",
+ " foo | \n",
+ " No | \n",
+ " Thur | \n",
+ " Lunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 15.77 | \n",
+ " 2.23 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 222 | \n",
+ " 8.58 | \n",
+ " 1.92 | \n",
+ " Male | \n",
+ " Yes | \n",
+ " Fri | \n",
+ " Lunch | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " 15.06 | \n",
+ " 3.00 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 37 | \n",
+ " 16.93 | \n",
+ " 3.07 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 215 | \n",
+ " 12.90 | \n",
+ " 1.10 | \n",
+ " Female | \n",
+ " Yes | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 71 | \n",
+ " 17.07 | \n",
+ " 3.00 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 49 | \n",
+ " 18.04 | \n",
+ " 3.00 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 131 | \n",
+ " 20.27 | \n",
+ " 2.83 | \n",
+ " Female | \n",
+ " No | \n",
+ " Thur | \n",
+ " Lunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 21.01 | \n",
+ " 3.50 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " 9.68 | \n",
+ " 1.32 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 48 | \n",
+ " 28.55 | \n",
+ " 2.05 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 233 | \n",
+ " 10.77 | \n",
+ " 1.47 | \n",
+ " Male | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 135 | \n",
+ " 8.51 | \n",
+ " 1.25 | \n",
+ " Female | \n",
+ " No | \n",
+ " Thur | \n",
+ " Lunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 55 | \n",
+ " 19.49 | \n",
+ " 3.51 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 195 | \n",
+ " 7.56 | \n",
+ " 1.44 | \n",
+ " Male | \n",
+ " No | \n",
+ " Thur | \n",
+ " Lunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 109 | \n",
+ " 14.31 | \n",
+ " 4.00 | \n",
+ " Female | \n",
+ " Yes | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 239 | \n",
+ " 29.03 | \n",
+ " 5.92 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 116 | \n",
+ " 29.93 | \n",
+ " 5.07 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " 19.65 | \n",
+ " 3.00 | \n",
+ " Female | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 93 | \n",
+ " 16.32 | \n",
+ " 4.30 | \n",
+ " Female | \n",
+ " Yes | \n",
+ " Fri | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 113 | \n",
+ " 23.95 | \n",
+ " 2.55 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 68 | \n",
+ " 20.23 | \n",
+ " 2.01 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sat | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 42 | \n",
+ " 13.94 | \n",
+ " 3.06 | \n",
+ " foo | \n",
+ " No | \n",
+ " Sun | \n",
+ " Dinner | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 79 | \n",
+ " 17.29 | \n",
+ " 2.71 | \n",
+ " foo | \n",
+ " No | \n",
+ " Thur | \n",
+ " Lunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " total_bill tip sex smoker day time size\n",
+ "3 23.68 3.31 foo No Sun Dinner 2\n",
+ "183 23.17 6.50 foo Yes Sun Dinner 4\n",
+ "107 25.21 4.29 foo Yes Sat Dinner 2\n",
+ "26 13.37 2.00 Male No Sat Dinner 2\n",
+ "225 16.27 2.50 Female Yes Fri Lunch 2\n",
+ "5 25.29 4.71 foo No Sun Dinner 4\n",
+ "18 16.97 3.50 Female No Sun Dinner 3\n",
+ "230 24.01 2.00 Male Yes Sat Dinner 4\n",
+ "211 25.89 5.16 foo Yes Sat Dinner 4\n",
+ "138 16.00 2.00 Male Yes Thur Lunch 2\n",
+ "57 26.41 1.50 Female No Sat Dinner 2\n",
+ "34 17.78 3.27 foo No Sat Dinner 2\n",
+ "179 34.63 3.55 foo Yes Sun Dinner 2\n",
+ "155 29.85 5.14 Female No Sun Dinner 5\n",
+ "19 20.65 3.35 foo No Sat Dinner 3\n",
+ "153 24.55 2.00 Male No Sun Dinner 4\n",
+ "148 9.78 1.73 Male No Thur Lunch 2\n",
+ "100 11.35 2.50 Female Yes Fri Dinner 2\n",
+ "235 10.07 1.25 Male No Sat Dinner 2\n",
+ "0 16.99 1.01 Female No Sun Dinner 2\n",
+ "20 17.92 4.08 foo No Sat Dinner 2\n",
+ "216 28.15 3.00 foo Yes Sat Dinner 5\n",
+ "137 14.15 2.00 Female No Thur Lunch 2\n",
+ "166 20.76 2.24 foo No Sun Dinner 2\n",
+ "122 14.26 2.50 foo No Thur Lunch 2\n",
+ "22 15.77 2.23 Female No Sat Dinner 2\n",
+ "222 8.58 1.92 Male Yes Fri Lunch 1\n",
+ "32 15.06 3.00 Female No Sat Dinner 2\n",
+ "37 16.93 3.07 Female No Sat Dinner 3\n",
+ "215 12.90 1.10 Female Yes Sat Dinner 2\n",
+ "71 17.07 3.00 Female No Sat Dinner 3\n",
+ "49 18.04 3.00 foo No Sun Dinner 2\n",
+ "131 20.27 2.83 Female No Thur Lunch 2\n",
+ "2 21.01 3.50 Female No Sun Dinner 3\n",
+ "43 9.68 1.32 Male No Sun Dinner 2\n",
+ "48 28.55 2.05 foo No Sun Dinner 3\n",
+ "233 10.77 1.47 Male No Sat Dinner 2\n",
+ "135 8.51 1.25 Female No Thur Lunch 2\n",
+ "55 19.49 3.51 foo No Sun Dinner 2\n",
+ "195 7.56 1.44 Male No Thur Lunch 2\n",
+ "109 14.31 4.00 Female Yes Sat Dinner 2\n",
+ "239 29.03 5.92 foo No Sat Dinner 3\n",
+ "116 29.93 5.07 foo No Sun Dinner 4\n",
+ "29 19.65 3.00 Female No Sat Dinner 2\n",
+ "93 16.32 4.30 Female Yes Fri Dinner 2\n",
+ "113 23.95 2.55 foo No Sun Dinner 2\n",
+ "68 20.23 2.01 foo No Sat Dinner 2\n",
+ "42 13.94 3.06 foo No Sun Dinner 2\n",
+ "79 17.29 2.71 foo No Thur Lunch 2"
+ ]
+ },
+ "execution_count": 96,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips.sample(frac=0.2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " tip | \n",
+ " size | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | tip | \n",
+ " 1.000000 | \n",
+ " 0.387542 | \n",
+ "
\n",
+ " \n",
+ " | size | \n",
+ " 0.387542 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " tip size\n",
+ "tip 1.000000 0.387542\n",
+ "size 0.387542 1.000000"
+ ]
+ },
+ "execution_count": 107,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Challenge:\n",
"\n",
"# Using the tips dataframe, calculate the correlation between\n",
"# tip and size for only Male clients during Dinner. \n",
"\n",
+ "#tips[(tips.time==\"Dinner\") &(tips.sex==\"Male\")][[\"tip\",\"size\"]].corr()\n",
+ "\n",
"# HINT: Remember that \"size\" cannot be accessed via dot notation, as it's an \n",
- "# attribute of the series!"
+ "# attribute of the series!\n",
+ "\n",
+ "#tips.head()\n",
+ "# Split code in multiple lines and put it all in paranthesis\n",
+ "(tips[(tips.time==\"Dinner\") &(tips.sex==\"Male\")]\n",
+ " [[\"tip\",\"size\"]]\n",
+ " .corr()\n",
+ ")"
]
},
{
@@ -864,21 +2699,149 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 111,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 111,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Group tips dataframe by size of table\n",
"by_size = tips.groupby(\"size\")\n",
"\n",
- "by_size"
+ "by_size\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 98,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[(1, total_bill tip sex smoker day time size\n",
+ " 67 3.07 1.00 Female Yes Sat Dinner 1\n",
+ " 82 10.07 1.83 Female No Thur Lunch 1\n",
+ " 111 7.25 1.00 Female No Sat Dinner 1\n",
+ " 222 8.58 1.92 Male Yes Fri Lunch 1),\n",
+ " (2, total_bill tip sex smoker day time size\n",
+ " 0 16.99 1.01 Female No Sun Dinner 2\n",
+ " 3 23.68 3.31 foo No Sun Dinner 2\n",
+ " 6 8.77 2.00 Male No Sun Dinner 2\n",
+ " 8 15.04 1.96 Male No Sun Dinner 2\n",
+ " 9 14.78 3.23 foo No Sun Dinner 2\n",
+ " .. ... ... ... ... ... ... ...\n",
+ " 237 32.83 1.17 Male Yes Sat Dinner 2\n",
+ " 240 27.18 2.00 Female Yes Sat Dinner 2\n",
+ " 241 22.67 2.00 Male Yes Sat Dinner 2\n",
+ " 242 17.82 1.75 Male No Sat Dinner 2\n",
+ " 243 18.78 3.00 Female No Thur Dinner 2\n",
+ " \n",
+ " [156 rows x 7 columns]),\n",
+ " (3, total_bill tip sex smoker day time size\n",
+ " 1 10.34 1.66 Male No Sun Dinner 3\n",
+ " 2 21.01 3.50 Female No Sun Dinner 3\n",
+ " 16 10.33 1.67 Female No Sun Dinner 3\n",
+ " 17 16.29 3.71 foo No Sun Dinner 3\n",
+ " 18 16.97 3.50 Female No Sun Dinner 3\n",
+ " 19 20.65 3.35 foo No Sat Dinner 3\n",
+ " 35 24.06 3.60 foo No Sat Dinner 3\n",
+ " 36 16.31 2.00 Male No Sat Dinner 3\n",
+ " 37 16.93 3.07 Female No Sat Dinner 3\n",
+ " 38 18.69 2.31 foo No Sat Dinner 3\n",
+ " 39 31.27 5.00 foo No Sat Dinner 3\n",
+ " 40 16.04 2.24 foo No Sat Dinner 3\n",
+ " 48 28.55 2.05 foo No Sun Dinner 3\n",
+ " 64 17.59 2.64 foo No Sat Dinner 3\n",
+ " 65 20.08 3.15 foo No Sat Dinner 3\n",
+ " 71 17.07 3.00 Female No Sat Dinner 3\n",
+ " 102 44.30 2.50 Female Yes Sat Dinner 3\n",
+ " 112 38.07 4.00 foo No Sun Dinner 3\n",
+ " 114 25.71 4.00 Female No Sun Dinner 3\n",
+ " 129 22.82 2.18 foo No Thur Lunch 3\n",
+ " 146 18.64 1.36 Female No Thur Lunch 3\n",
+ " 152 17.26 2.74 foo No Sun Dinner 3\n",
+ " 162 16.21 2.00 Female No Sun Dinner 3\n",
+ " 165 24.52 3.48 foo No Sun Dinner 3\n",
+ " 170 50.81 10.00 foo Yes Sat Dinner 3\n",
+ " 182 45.35 3.50 foo Yes Sun Dinner 3\n",
+ " 186 20.90 3.50 Female Yes Sun Dinner 3\n",
+ " 188 18.15 3.50 Female Yes Sun Dinner 3\n",
+ " 189 23.10 4.00 foo Yes Sun Dinner 3\n",
+ " 200 18.71 4.00 foo Yes Thur Lunch 3\n",
+ " 205 16.47 3.23 Female Yes Thur Lunch 3\n",
+ " 206 26.59 3.41 foo Yes Sat Dinner 3\n",
+ " 210 30.06 2.00 Male Yes Sat Dinner 3\n",
+ " 214 28.17 6.50 Female Yes Sat Dinner 3\n",
+ " 223 15.98 3.00 Female No Fri Lunch 3\n",
+ " 231 15.69 3.00 foo Yes Sat Dinner 3\n",
+ " 238 35.83 4.67 Female No Sat Dinner 3\n",
+ " 239 29.03 5.92 foo No Sat Dinner 3),\n",
+ " (4, total_bill tip sex smoker day time size\n",
+ " 4 24.59 3.61 Female No Sun Dinner 4\n",
+ " 5 25.29 4.71 foo No Sun Dinner 4\n",
+ " 7 26.88 3.12 foo No Sun Dinner 4\n",
+ " 11 35.26 5.00 Female No Sun Dinner 4\n",
+ " 13 18.43 3.00 foo No Sun Dinner 4\n",
+ " 23 39.42 7.58 foo No Sat Dinner 4\n",
+ " 25 17.81 2.34 foo No Sat Dinner 4\n",
+ " 31 18.35 2.50 foo No Sat Dinner 4\n",
+ " 33 20.69 2.45 Female No Sat Dinner 4\n",
+ " 44 30.40 5.60 foo No Sun Dinner 4\n",
+ " 47 32.40 6.00 foo No Sun Dinner 4\n",
+ " 52 34.81 5.20 Female No Sun Dinner 4\n",
+ " 54 25.56 4.34 foo No Sun Dinner 4\n",
+ " 56 38.01 3.00 foo Yes Sat Dinner 4\n",
+ " 59 48.27 6.73 foo No Sat Dinner 4\n",
+ " 63 18.29 3.76 foo Yes Sat Dinner 4\n",
+ " 77 27.20 4.00 foo No Thur Lunch 4\n",
+ " 85 34.83 5.17 Female No Thur Lunch 4\n",
+ " 95 40.17 4.73 foo Yes Fri Dinner 4\n",
+ " 116 29.93 5.07 foo No Sun Dinner 4\n",
+ " 119 24.08 2.92 Female No Thur Lunch 4\n",
+ " 153 24.55 2.00 Male No Sun Dinner 4\n",
+ " 154 19.77 2.00 Male No Sun Dinner 4\n",
+ " 157 25.00 3.75 Female No Sun Dinner 4\n",
+ " 159 16.49 2.00 Male No Sun Dinner 4\n",
+ " 160 21.50 3.50 foo No Sun Dinner 4\n",
+ " 167 31.71 4.50 foo No Sun Dinner 4\n",
+ " 180 34.65 3.68 foo Yes Sun Dinner 4\n",
+ " 183 23.17 6.50 foo Yes Sun Dinner 4\n",
+ " 197 43.11 5.00 Female Yes Thur Lunch 4\n",
+ " 204 20.53 4.00 foo Yes Thur Lunch 4\n",
+ " 207 38.73 3.00 foo Yes Sat Dinner 4\n",
+ " 211 25.89 5.16 foo Yes Sat Dinner 4\n",
+ " 212 48.33 9.00 foo No Sat Dinner 4\n",
+ " 219 30.14 3.09 Female Yes Sat Dinner 4\n",
+ " 227 20.45 3.00 foo No Sat Dinner 4\n",
+ " 230 24.01 2.00 Male Yes Sat Dinner 4),\n",
+ " (5, total_bill tip sex smoker day time size\n",
+ " 142 41.19 5.00 foo No Thur Lunch 5\n",
+ " 155 29.85 5.14 Female No Sun Dinner 5\n",
+ " 185 20.69 5.00 foo No Sun Dinner 5\n",
+ " 187 30.46 2.00 Male Yes Sun Dinner 5\n",
+ " 216 28.15 3.00 foo Yes Sat Dinner 5),\n",
+ " (6, total_bill tip sex smoker day time size\n",
+ " 125 29.80 4.2 Female No Thur Lunch 6\n",
+ " 141 34.30 6.7 foo No Thur Lunch 6\n",
+ " 143 27.05 5.0 Female No Thur Lunch 6\n",
+ " 156 48.17 5.0 foo No Sun Dinner 6)]"
+ ]
+ },
+ "execution_count": 98,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# If we coerce it to a list, we see something interesting: \n",
"# It's basically a list of tuples! \n",
@@ -890,9 +2853,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 99,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Female\n",
+ "total_bill 18.090455\n",
+ "tip 2.841023\n",
+ "size 2.465909\n",
+ "dtype: float64\n",
+ "Male\n",
+ "total_bill 14.3908\n",
+ "tip 1.7512\n",
+ "size 2.2600\n",
+ "dtype: float64\n",
+ "foo\n",
+ "total_bill 23.738396\n",
+ "tip 3.717075\n",
+ "size 2.801887\n",
+ "dtype: float64\n"
+ ]
+ }
+ ],
"source": [
"# We can iterate through the groupby just like we would a list of tuples!\n",
"\n",
@@ -921,9 +2906,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 100,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "sex\n",
+ "Female 44.30\n",
+ "Male 32.83\n",
+ "foo 50.81\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 100,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Get the maximum bill by gender: \n",
"\n",
@@ -935,14 +2937,63 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 105,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9.0"
+ ]
+ },
+ "execution_count": 105,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips.tip.sort_values(ascending=False).iloc[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 132,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "sex\n",
+ "Female 43.11\n",
+ "Male 30.46\n",
+ "foo 48.33\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 132,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Challenge: \n",
"\n",
+ "\n",
+ "from toolz import curry\n",
"# Get the second largest bill by gender!\n",
- "# HINT: use sort_values and iloc!"
+ "# HINT: use sort_values and iloc!\n",
+ "\n",
+ "def max_nbill(df,n):\n",
+ " return df.total_bill.sort_values(ascending=False).iloc[n-1]\n",
+ "\n",
+ "tips.groupby(\"sex\").apply(lambda df: max_nbill(df,n=2))\n",
+ "\n",
+ "#def partial (fn,*args):\n",
+ "# return lambda x: fn(x, *args)\n",
+ "\n",
+ "#tips.groupby('sex').apply(partial(max_nbill,n=3))\n",
+ "\n",
+ "from functools import partial\n"
]
},
{
@@ -961,9 +3012,180 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 135,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " total_bill | \n",
+ " tip | \n",
+ " sex | \n",
+ " smoker | \n",
+ " day | \n",
+ " time | \n",
+ " size | \n",
+ "
\n",
+ " \n",
+ " | sex | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Female | \n",
+ " 44.30 | \n",
+ " 6.5 | \n",
+ " Female | \n",
+ " Yes | \n",
+ " Thur | \n",
+ " Lunch | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " | Male | \n",
+ " 32.83 | \n",
+ " 2.0 | \n",
+ " Male | \n",
+ " Yes | \n",
+ " Thur | \n",
+ " Lunch | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | foo | \n",
+ " 50.81 | \n",
+ " 10.0 | \n",
+ " foo | \n",
+ " Yes | \n",
+ " Thur | \n",
+ " Lunch | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " total_bill tip sex smoker day time size\n",
+ "sex \n",
+ "Female 44.30 6.5 Female Yes Thur Lunch 6\n",
+ "Male 32.83 2.0 Male Yes Thur Lunch 5\n",
+ "foo 50.81 10.0 foo Yes Thur Lunch 6"
+ ]
+ },
+ "execution_count": 135,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips.groupby(\"sex\").max()\n",
+ "tips.groupby(\"sex\").apply(lambda c: c.max())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 136,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | day | \n",
+ " Fri | \n",
+ " Sat | \n",
+ " Sun | \n",
+ " Thur | \n",
+ "
\n",
+ " \n",
+ " | sex | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Female | \n",
+ " 2.781111 | \n",
+ " 2.801786 | \n",
+ " 3.374211 | \n",
+ " 2.575625 | \n",
+ "
\n",
+ " \n",
+ " | Male | \n",
+ " 1.625000 | \n",
+ " 1.681500 | \n",
+ " 1.830000 | \n",
+ " 1.815000 | \n",
+ "
\n",
+ " \n",
+ " | foo | \n",
+ " 3.405000 | \n",
+ " 3.803077 | \n",
+ " 3.756098 | \n",
+ " 3.563000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "day Fri Sat Sun Thur\n",
+ "sex \n",
+ "Female 2.781111 2.801786 3.374211 2.575625\n",
+ "Male 1.625000 1.681500 1.830000 1.815000\n",
+ "foo 3.405000 3.803077 3.756098 3.563000"
+ ]
+ },
+ "execution_count": 136,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Challenge: \n",
"# What is the mean tip, per day, for male vs. female?\n",
@@ -972,7 +3194,7 @@
"def day_mean(df):\n",
" # Hint: you will need to group by \"day\"\n",
" # in this function, then get the mean tip. \n",
- " pass\n",
+ " return df.groupby(\"day\").tip.mean()\n",
"\n",
"\n",
"tips.groupby(\"sex\").apply(day_mean)"
@@ -998,9 +3220,434 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 138,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " day | \n",
+ " sex | \n",
+ " total_bill | \n",
+ " tip | \n",
+ " size | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Fri | \n",
+ " Female | \n",
+ " 14.145556 | \n",
+ " 2.781111 | \n",
+ " 2.111111 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Fri | \n",
+ " Male | \n",
+ " 11.622500 | \n",
+ " 1.625000 | \n",
+ " 1.750000 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Fri | \n",
+ " foo | \n",
+ " 25.346667 | \n",
+ " 3.405000 | \n",
+ " 2.333333 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Sat | \n",
+ " Female | \n",
+ " 19.680357 | \n",
+ " 2.801786 | \n",
+ " 2.250000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Sat | \n",
+ " Male | \n",
+ " 15.302000 | \n",
+ " 1.681500 | \n",
+ " 2.200000 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Sat | \n",
+ " foo | \n",
+ " 23.623333 | \n",
+ " 3.803077 | \n",
+ " 2.871795 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " Sun | \n",
+ " Female | \n",
+ " 19.932105 | \n",
+ " 3.374211 | \n",
+ " 2.947368 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " Sun | \n",
+ " Male | \n",
+ " 15.358125 | \n",
+ " 1.830000 | \n",
+ " 2.625000 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " Sun | \n",
+ " foo | \n",
+ " 24.456585 | \n",
+ " 3.756098 | \n",
+ " 2.878049 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " Thur | \n",
+ " Female | \n",
+ " 16.715312 | \n",
+ " 2.575625 | \n",
+ " 2.468750 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " Thur | \n",
+ " Male | \n",
+ " 12.128000 | \n",
+ " 1.815000 | \n",
+ " 2.000000 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " Thur | \n",
+ " foo | \n",
+ " 22.008000 | \n",
+ " 3.563000 | \n",
+ " 2.650000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " day sex total_bill tip size\n",
+ "0 Fri Female 14.145556 2.781111 2.111111\n",
+ "1 Fri Male 11.622500 1.625000 1.750000\n",
+ "2 Fri foo 25.346667 3.405000 2.333333\n",
+ "3 Sat Female 19.680357 2.801786 2.250000\n",
+ "4 Sat Male 15.302000 1.681500 2.200000\n",
+ "5 Sat foo 23.623333 3.803077 2.871795\n",
+ "6 Sun Female 19.932105 3.374211 2.947368\n",
+ "7 Sun Male 15.358125 1.830000 2.625000\n",
+ "8 Sun foo 24.456585 3.756098 2.878049\n",
+ "9 Thur Female 16.715312 2.575625 2.468750\n",
+ "10 Thur Male 12.128000 1.815000 2.000000\n",
+ "11 Thur foo 22.008000 3.563000 2.650000"
+ ]
+ },
+ "execution_count": 138,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tips.groupby(['day','sex']).mean().reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 137,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[(('Female', 'Fri'), total_bill tip sex smoker day time size\n",
+ " 92 5.75 1.00 Female Yes Fri Dinner 2\n",
+ " 93 16.32 4.30 Female Yes Fri Dinner 2\n",
+ " 94 22.75 3.25 Female No Fri Dinner 2\n",
+ " 100 11.35 2.50 Female Yes Fri Dinner 2\n",
+ " 101 15.38 3.00 Female Yes Fri Dinner 2\n",
+ " 221 13.42 3.48 Female Yes Fri Lunch 2\n",
+ " 223 15.98 3.00 Female No Fri Lunch 3\n",
+ " 225 16.27 2.50 Female Yes Fri Lunch 2\n",
+ " 226 10.09 2.00 Female Yes Fri Lunch 2),\n",
+ " (('Female', 'Sat'), total_bill tip sex smoker day time size\n",
+ " 21 20.29 2.75 Female No Sat Dinner 2\n",
+ " 22 15.77 2.23 Female No Sat Dinner 2\n",
+ " 29 19.65 3.00 Female No Sat Dinner 2\n",
+ " 32 15.06 3.00 Female No Sat Dinner 2\n",
+ " 33 20.69 2.45 Female No Sat Dinner 4\n",
+ " 37 16.93 3.07 Female No Sat Dinner 3\n",
+ " 57 26.41 1.50 Female No Sat Dinner 2\n",
+ " 66 16.45 2.47 Female No Sat Dinner 2\n",
+ " 67 3.07 1.00 Female Yes Sat Dinner 1\n",
+ " 71 17.07 3.00 Female No Sat Dinner 3\n",
+ " 72 26.86 3.14 Female Yes Sat Dinner 2\n",
+ " 73 25.28 5.00 Female Yes Sat Dinner 2\n",
+ " 74 14.73 2.20 Female No Sat Dinner 2\n",
+ " 102 44.30 2.50 Female Yes Sat Dinner 3\n",
+ " 103 22.42 3.48 Female Yes Sat Dinner 2\n",
+ " 104 20.92 4.08 Female No Sat Dinner 2\n",
+ " 109 14.31 4.00 Female Yes Sat Dinner 2\n",
+ " 111 7.25 1.00 Female No Sat Dinner 1\n",
+ " 168 10.59 1.61 Female Yes Sat Dinner 2\n",
+ " 169 10.63 2.00 Female Yes Sat Dinner 2\n",
+ " 209 12.76 2.23 Female Yes Sat Dinner 2\n",
+ " 213 13.27 2.50 Female Yes Sat Dinner 2\n",
+ " 214 28.17 6.50 Female Yes Sat Dinner 3\n",
+ " 215 12.90 1.10 Female Yes Sat Dinner 2\n",
+ " 219 30.14 3.09 Female Yes Sat Dinner 4\n",
+ " 229 22.12 2.88 Female Yes Sat Dinner 2\n",
+ " 238 35.83 4.67 Female No Sat Dinner 3\n",
+ " 240 27.18 2.00 Female Yes Sat Dinner 2),\n",
+ " (('Female', 'Sun'), total_bill tip sex smoker day time size\n",
+ " 0 16.99 1.01 Female No Sun Dinner 2\n",
+ " 2 21.01 3.50 Female No Sun Dinner 3\n",
+ " 4 24.59 3.61 Female No Sun Dinner 4\n",
+ " 11 35.26 5.00 Female No Sun Dinner 4\n",
+ " 14 14.83 3.02 Female No Sun Dinner 2\n",
+ " 16 10.33 1.67 Female No Sun Dinner 3\n",
+ " 18 16.97 3.50 Female No Sun Dinner 3\n",
+ " 51 10.29 2.60 Female No Sun Dinner 2\n",
+ " 52 34.81 5.20 Female No Sun Dinner 4\n",
+ " 114 25.71 4.00 Female No Sun Dinner 3\n",
+ " 115 17.31 3.50 Female No Sun Dinner 2\n",
+ " 155 29.85 5.14 Female No Sun Dinner 5\n",
+ " 157 25.00 3.75 Female No Sun Dinner 4\n",
+ " 158 13.39 2.61 Female No Sun Dinner 2\n",
+ " 162 16.21 2.00 Female No Sun Dinner 3\n",
+ " 164 17.51 3.00 Female Yes Sun Dinner 2\n",
+ " 178 9.60 4.00 Female Yes Sun Dinner 2\n",
+ " 186 20.90 3.50 Female Yes Sun Dinner 3\n",
+ " 188 18.15 3.50 Female Yes Sun Dinner 3),\n",
+ " (('Female', 'Thur'), total_bill tip sex smoker day time size\n",
+ " 82 10.07 1.83 Female No Thur Lunch 1\n",
+ " 85 34.83 5.17 Female No Thur Lunch 4\n",
+ " 117 10.65 1.50 Female No Thur Lunch 2\n",
+ " 118 12.43 1.80 Female No Thur Lunch 2\n",
+ " 119 24.08 2.92 Female No Thur Lunch 4\n",
+ " 121 13.42 1.68 Female No Thur Lunch 2\n",
+ " 124 12.48 2.52 Female No Thur Lunch 2\n",
+ " 125 29.80 4.20 Female No Thur Lunch 6\n",
+ " 127 14.52 2.00 Female No Thur Lunch 2\n",
+ " 128 11.38 2.00 Female No Thur Lunch 2\n",
+ " 131 20.27 2.83 Female No Thur Lunch 2\n",
+ " 132 11.17 1.50 Female No Thur Lunch 2\n",
+ " 133 12.26 2.00 Female No Thur Lunch 2\n",
+ " 134 18.26 3.25 Female No Thur Lunch 2\n",
+ " 135 8.51 1.25 Female No Thur Lunch 2\n",
+ " 136 10.33 2.00 Female No Thur Lunch 2\n",
+ " 137 14.15 2.00 Female No Thur Lunch 2\n",
+ " 139 13.16 2.75 Female No Thur Lunch 2\n",
+ " 140 17.47 3.50 Female No Thur Lunch 2\n",
+ " 143 27.05 5.00 Female No Thur Lunch 6\n",
+ " 144 16.43 2.30 Female No Thur Lunch 2\n",
+ " 145 8.35 1.50 Female No Thur Lunch 2\n",
+ " 146 18.64 1.36 Female No Thur Lunch 3\n",
+ " 147 11.87 1.63 Female No Thur Lunch 2\n",
+ " 191 19.81 4.19 Female Yes Thur Lunch 2\n",
+ " 197 43.11 5.00 Female Yes Thur Lunch 4\n",
+ " 198 13.00 2.00 Female Yes Thur Lunch 2\n",
+ " 201 12.74 2.01 Female Yes Thur Lunch 2\n",
+ " 202 13.00 2.00 Female Yes Thur Lunch 2\n",
+ " 203 16.40 2.50 Female Yes Thur Lunch 2\n",
+ " 205 16.47 3.23 Female Yes Thur Lunch 3\n",
+ " 243 18.78 3.00 Female No Thur Dinner 2),\n",
+ " (('Male', 'Fri'), total_bill tip sex smoker day time size\n",
+ " 97 12.03 1.50 Male Yes Fri Dinner 2\n",
+ " 99 12.46 1.50 Male No Fri Dinner 2\n",
+ " 222 8.58 1.92 Male Yes Fri Lunch 1\n",
+ " 224 13.42 1.58 Male Yes Fri Lunch 2),\n",
+ " (('Male', 'Sat'), total_bill tip sex smoker day time size\n",
+ " 26 13.37 2.00 Male No Sat Dinner 2\n",
+ " 27 12.69 2.00 Male No Sat Dinner 2\n",
+ " 30 9.55 1.45 Male No Sat Dinner 2\n",
+ " 36 16.31 2.00 Male No Sat Dinner 3\n",
+ " 58 11.24 1.76 Male Yes Sat Dinner 2\n",
+ " 61 13.81 2.00 Male Yes Sat Dinner 2\n",
+ " 62 11.02 1.98 Male Yes Sat Dinner 2\n",
+ " 70 12.02 1.97 Male No Sat Dinner 2\n",
+ " 75 10.51 1.25 Male No Sat Dinner 2\n",
+ " 105 15.36 1.64 Male Yes Sat Dinner 2\n",
+ " 210 30.06 2.00 Male Yes Sat Dinner 3\n",
+ " 217 11.59 1.50 Male Yes Sat Dinner 2\n",
+ " 218 7.74 1.44 Male Yes Sat Dinner 2\n",
+ " 230 24.01 2.00 Male Yes Sat Dinner 4\n",
+ " 233 10.77 1.47 Male No Sat Dinner 2\n",
+ " 235 10.07 1.25 Male No Sat Dinner 2\n",
+ " 236 12.60 1.00 Male Yes Sat Dinner 2\n",
+ " 237 32.83 1.17 Male Yes Sat Dinner 2\n",
+ " 241 22.67 2.00 Male Yes Sat Dinner 2\n",
+ " 242 17.82 1.75 Male No Sat Dinner 2),\n",
+ " (('Male', 'Sun'), total_bill tip sex smoker day time size\n",
+ " 1 10.34 1.66 Male No Sun Dinner 3\n",
+ " 6 8.77 2.00 Male No Sun Dinner 2\n",
+ " 8 15.04 1.96 Male No Sun Dinner 2\n",
+ " 10 10.27 1.71 Male No Sun Dinner 2\n",
+ " 12 15.42 1.57 Male No Sun Dinner 2\n",
+ " 43 9.68 1.32 Male No Sun Dinner 2\n",
+ " 53 9.94 1.56 Male No Sun Dinner 2\n",
+ " 151 13.13 2.00 Male No Sun Dinner 2\n",
+ " 153 24.55 2.00 Male No Sun Dinner 4\n",
+ " 154 19.77 2.00 Male No Sun Dinner 4\n",
+ " 159 16.49 2.00 Male No Sun Dinner 4\n",
+ " 163 13.81 2.00 Male No Sun Dinner 2\n",
+ " 176 17.89 2.00 Male Yes Sun Dinner 2\n",
+ " 177 14.48 2.00 Male Yes Sun Dinner 2\n",
+ " 187 30.46 2.00 Male Yes Sun Dinner 5\n",
+ " 190 15.69 1.50 Male Yes Sun Dinner 2),\n",
+ " (('Male', 'Thur'), total_bill tip sex smoker day time size\n",
+ " 86 13.03 2.00 Male No Thur Lunch 2\n",
+ " 123 15.95 2.00 Male No Thur Lunch 2\n",
+ " 126 8.52 1.48 Male No Thur Lunch 2\n",
+ " 130 19.08 1.50 Male No Thur Lunch 2\n",
+ " 138 16.00 2.00 Male Yes Thur Lunch 2\n",
+ " 148 9.78 1.73 Male No Thur Lunch 2\n",
+ " 149 7.51 2.00 Male No Thur Lunch 2\n",
+ " 195 7.56 1.44 Male No Thur Lunch 2\n",
+ " 196 10.34 2.00 Male Yes Thur Lunch 2\n",
+ " 199 13.51 2.00 Male Yes Thur Lunch 2),\n",
+ " (('foo', 'Fri'), total_bill tip sex smoker day time size\n",
+ " 90 28.97 3.00 foo Yes Fri Dinner 2\n",
+ " 91 22.49 3.50 foo No Fri Dinner 2\n",
+ " 95 40.17 4.73 foo Yes Fri Dinner 4\n",
+ " 96 27.28 4.00 foo Yes Fri Dinner 2\n",
+ " 98 21.01 3.00 foo Yes Fri Dinner 2\n",
+ " 220 12.16 2.20 foo Yes Fri Lunch 2),\n",
+ " (('foo', 'Sat'), total_bill tip sex smoker day time size\n",
+ " 19 20.65 3.35 foo No Sat Dinner 3\n",
+ " 20 17.92 4.08 foo No Sat Dinner 2\n",
+ " 23 39.42 7.58 foo No Sat Dinner 4\n",
+ " 24 19.82 3.18 foo No Sat Dinner 2\n",
+ " 25 17.81 2.34 foo No Sat Dinner 4\n",
+ " 28 21.70 4.30 foo No Sat Dinner 2\n",
+ " 31 18.35 2.50 foo No Sat Dinner 4\n",
+ " 34 17.78 3.27 foo No Sat Dinner 2\n",
+ " 35 24.06 3.60 foo No Sat Dinner 3\n",
+ " 38 18.69 2.31 foo No Sat Dinner 3\n",
+ " 39 31.27 5.00 foo No Sat Dinner 3\n",
+ " 40 16.04 2.24 foo No Sat Dinner 3\n",
+ " 56 38.01 3.00 foo Yes Sat Dinner 4\n",
+ " 59 48.27 6.73 foo No Sat Dinner 4\n",
+ " 60 20.29 3.21 foo Yes Sat Dinner 2\n",
+ " 63 18.29 3.76 foo Yes Sat Dinner 4\n",
+ " 64 17.59 2.64 foo No Sat Dinner 3\n",
+ " 65 20.08 3.15 foo No Sat Dinner 3\n",
+ " 68 20.23 2.01 foo No Sat Dinner 2\n",
+ " 69 15.01 2.09 foo Yes Sat Dinner 2\n",
+ " 76 17.92 3.08 foo Yes Sat Dinner 2\n",
+ " 106 20.49 4.06 foo Yes Sat Dinner 2\n",
+ " 107 25.21 4.29 foo Yes Sat Dinner 2\n",
+ " 108 18.24 3.76 foo No Sat Dinner 2\n",
+ " 110 14.00 3.00 foo No Sat Dinner 2\n",
+ " 170 50.81 10.00 foo Yes Sat Dinner 3\n",
+ " 171 15.81 3.16 foo Yes Sat Dinner 2\n",
+ " 206 26.59 3.41 foo Yes Sat Dinner 3\n",
+ " 207 38.73 3.00 foo Yes Sat Dinner 4\n",
+ " 208 24.27 2.03 foo Yes Sat Dinner 2\n",
+ " 211 25.89 5.16 foo Yes Sat Dinner 4\n",
+ " 212 48.33 9.00 foo No Sat Dinner 4\n",
+ " 216 28.15 3.00 foo Yes Sat Dinner 5\n",
+ " 227 20.45 3.00 foo No Sat Dinner 4\n",
+ " 228 13.28 2.72 foo No Sat Dinner 2\n",
+ " 231 15.69 3.00 foo Yes Sat Dinner 3\n",
+ " 232 11.61 3.39 foo No Sat Dinner 2\n",
+ " 234 15.53 3.00 foo Yes Sat Dinner 2\n",
+ " 239 29.03 5.92 foo No Sat Dinner 3),\n",
+ " (('foo', 'Sun'), total_bill tip sex smoker day time size\n",
+ " 3 23.68 3.31 foo No Sun Dinner 2\n",
+ " 5 25.29 4.71 foo No Sun Dinner 4\n",
+ " 7 26.88 3.12 foo No Sun Dinner 4\n",
+ " 9 14.78 3.23 foo No Sun Dinner 2\n",
+ " 13 18.43 3.00 foo No Sun Dinner 4\n",
+ " 15 21.58 3.92 foo No Sun Dinner 2\n",
+ " 17 16.29 3.71 foo No Sun Dinner 3\n",
+ " 41 17.46 2.54 foo No Sun Dinner 2\n",
+ " 42 13.94 3.06 foo No Sun Dinner 2\n",
+ " 44 30.40 5.60 foo No Sun Dinner 4\n",
+ " 45 18.29 3.00 foo No Sun Dinner 2\n",
+ " 46 22.23 5.00 foo No Sun Dinner 2\n",
+ " 47 32.40 6.00 foo No Sun Dinner 4\n",
+ " 48 28.55 2.05 foo No Sun Dinner 3\n",
+ " 49 18.04 3.00 foo No Sun Dinner 2\n",
+ " 50 12.54 2.50 foo No Sun Dinner 2\n",
+ " 54 25.56 4.34 foo No Sun Dinner 4\n",
+ " 55 19.49 3.51 foo No Sun Dinner 2\n",
+ " 112 38.07 4.00 foo No Sun Dinner 3\n",
+ " 113 23.95 2.55 foo No Sun Dinner 2\n",
+ " 116 29.93 5.07 foo No Sun Dinner 4\n",
+ " 150 14.07 2.50 foo No Sun Dinner 2\n",
+ " 152 17.26 2.74 foo No Sun Dinner 3\n",
+ " 156 48.17 5.00 foo No Sun Dinner 6\n",
+ " 160 21.50 3.50 foo No Sun Dinner 4\n",
+ " 161 12.66 2.50 foo No Sun Dinner 2\n",
+ " 165 24.52 3.48 foo No Sun Dinner 3\n",
+ " 166 20.76 2.24 foo No Sun Dinner 2\n",
+ " 167 31.71 4.50 foo No Sun Dinner 4\n",
+ " 172 7.25 5.15 foo Yes Sun Dinner 2\n",
+ " 173 31.85 3.18 foo Yes Sun Dinner 2\n",
+ " 174 16.82 4.00 foo Yes Sun Dinner 2\n",
+ " 175 32.90 3.11 foo Yes Sun Dinner 2\n",
+ " 179 34.63 3.55 foo Yes Sun Dinner 2\n",
+ " 180 34.65 3.68 foo Yes Sun Dinner 4\n",
+ " 181 23.33 5.65 foo Yes Sun Dinner 2\n",
+ " 182 45.35 3.50 foo Yes Sun Dinner 3\n",
+ " 183 23.17 6.50 foo Yes Sun Dinner 4\n",
+ " 184 40.55 3.00 foo Yes Sun Dinner 2\n",
+ " 185 20.69 5.00 foo No Sun Dinner 5\n",
+ " 189 23.10 4.00 foo Yes Sun Dinner 3),\n",
+ " (('foo', 'Thur'), total_bill tip sex smoker day time size\n",
+ " 77 27.20 4.00 foo No Thur Lunch 4\n",
+ " 78 22.76 3.00 foo No Thur Lunch 2\n",
+ " 79 17.29 2.71 foo No Thur Lunch 2\n",
+ " 80 19.44 3.00 foo Yes Thur Lunch 2\n",
+ " 81 16.66 3.40 foo No Thur Lunch 2\n",
+ " 83 32.68 5.00 foo Yes Thur Lunch 2\n",
+ " 84 15.98 2.03 foo No Thur Lunch 2\n",
+ " 87 18.28 4.00 foo No Thur Lunch 2\n",
+ " 88 24.71 5.85 foo No Thur Lunch 2\n",
+ " 89 21.16 3.00 foo No Thur Lunch 2\n",
+ " 120 11.69 2.31 foo No Thur Lunch 2\n",
+ " 122 14.26 2.50 foo No Thur Lunch 2\n",
+ " 129 22.82 2.18 foo No Thur Lunch 3\n",
+ " 141 34.30 6.70 foo No Thur Lunch 6\n",
+ " 142 41.19 5.00 foo No Thur Lunch 5\n",
+ " 192 28.44 2.56 foo Yes Thur Lunch 2\n",
+ " 193 15.48 2.02 foo Yes Thur Lunch 2\n",
+ " 194 16.58 4.00 foo Yes Thur Lunch 2\n",
+ " 200 18.71 4.00 foo Yes Thur Lunch 3\n",
+ " 204 20.53 4.00 foo Yes Thur Lunch 4)]"
+ ]
+ },
+ "execution_count": 137,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Take a look at the structure of the multiple groupby!\n",
"\n",
@@ -1073,6 +3720,212 @@
"\n"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 149,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "A 1.0\n",
+ "B 4.0\n",
+ "C NaN\n",
+ "Name: 0, dtype: float64\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:3: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
+ "of pandas will change to not sort by default.\n",
+ "\n",
+ "To accept the future behavior, pass 'sort=False'.\n",
+ "\n",
+ "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
+ "\n",
+ " This is separate from the ipykernel package so we can avoid doing imports until\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " A | \n",
+ " B | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " A B\n",
+ "0 1 4\n",
+ "1 2 5\n",
+ "2 3 6"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " A | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 4 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " A C\n",
+ "0 4 7"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " A | \n",
+ " B | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 4.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 5.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 6.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " 7.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " A B C\n",
+ "0 1 4.0 NaN\n",
+ "1 2 5.0 NaN\n",
+ "2 3 6.0 NaN\n",
+ "3 4 NaN 7.0"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "df1 = pd.DataFrame({\"A\": pd.Series([1,2,3]), \"B\": pd.Series([4,5,6])})\n",
+ "df2 = pd.DataFrame({\"A\": pd.Series([4]), \"C\": pd.Series([7])})\n",
+ "df = pd.concat([df1,df2]).reset_index(drop=True)\n",
+ "df = pd.concat([df1,df2],ignore_index=True,axis=0,sort=False)\n",
+ "\n",
+ "\n",
+ "print(df.loc[0])\n",
+ "\n",
+ "display(df1,df2,df)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {
@@ -1108,6 +3961,30 @@
"and what will happen if \"how\" changes to each of the other options?"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 148,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " A B C\n",
+ "0 1 4.0 NaN\n",
+ "1 2 5.0 NaN\n",
+ "2 3 6.0 NaN\n",
+ "3 4 NaN 7.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "df1 = pd.DataFrame({\"A\": pd.Series([1,2,3]), \"B\": pd.Series([4,5,6])})\n",
+ "df2 = pd.DataFrame({\"A\": pd.Series([4]), \"C\": pd.Series([7])})\n",
+ "df = pd.merge(df1,df2,on = \"A\", how = \"outer\")\n",
+ "print(df)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -1133,9 +4010,70 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 200,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " screenname | \n",
+ " id_str | \n",
+ " text | \n",
+ " hashtags | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " nandanrao | \n",
+ " 928374987 | \n",
+ " Woah, pandas is so much fun #worldrocked #jawd... | \n",
+ " [worldrocked, jawdrop, ml] | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " om | \n",
+ " 98214039 | \n",
+ " I eat linear models for breakfast #datascience... | \n",
+ " [datascience, ml, crossfit] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " screenname id_str text \\\n",
+ "0 nandanrao 928374987 Woah, pandas is so much fun #worldrocked #jawd... \n",
+ "1 om 98214039 I eat linear models for breakfast #datascience... \n",
+ "\n",
+ " hashtags \n",
+ "0 [worldrocked, jawdrop, ml] \n",
+ "1 [datascience, ml, crossfit] "
+ ]
+ },
+ "execution_count": 200,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"raw_tweets = [{ \"screenname\": \"nandanrao\",\n",
" \"id_str\": \"928374987\",\n",
@@ -1165,9 +4103,63 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 201,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " screenname | \n",
+ " id_str | \n",
+ " text | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " nandanrao | \n",
+ " 928374987 | \n",
+ " Woah, pandas is so much fun #worldrocked #jawd... | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " om | \n",
+ " 98214039 | \n",
+ " I eat linear models for breakfast #datascience... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " screenname id_str text\n",
+ "0 nandanrao 928374987 Woah, pandas is so much fun #worldrocked #jawd...\n",
+ "1 om 98214039 I eat linear models for breakfast #datascience..."
+ ]
+ },
+ "execution_count": 201,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"tweets = pd.DataFrame(raw_tweets, columns = [\"screenname\", \"id_str\", \"text\"])\n",
"tweets"
@@ -1175,10 +4167,87 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 202,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id_str | \n",
+ " hashtag | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 928374987 | \n",
+ " worldrocked | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 928374987 | \n",
+ " jawdrop | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 928374987 | \n",
+ " ml | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 98214039 | \n",
+ " datascience | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 98214039 | \n",
+ " ml | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 98214039 | \n",
+ " crossfit | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id_str hashtag\n",
+ "0 928374987 worldrocked\n",
+ "1 928374987 jawdrop\n",
+ "2 928374987 ml\n",
+ "3 98214039 datascience\n",
+ "4 98214039 ml\n",
+ "5 98214039 crossfit"
+ ]
+ },
+ "execution_count": 202,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
+ "# REALLY COOL IDEA!!\n",
+ "\n",
"tags_and_ids = [(t['id_str'], tag) \n",
" for t in raw_tweets \n",
" for tag in t['hashtags']]\n",
@@ -1190,15 +4259,139 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 203,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " screenname | \n",
+ " id_str | \n",
+ " text | \n",
+ " hashtag | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " nandanrao | \n",
+ " 928374987 | \n",
+ " Woah, pandas is so much fun #worldrocked #jawd... | \n",
+ " worldrocked | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " nandanrao | \n",
+ " 928374987 | \n",
+ " Woah, pandas is so much fun #worldrocked #jawd... | \n",
+ " jawdrop | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " nandanrao | \n",
+ " 928374987 | \n",
+ " Woah, pandas is so much fun #worldrocked #jawd... | \n",
+ " ml | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " om | \n",
+ " 98214039 | \n",
+ " I eat linear models for breakfast #datascience... | \n",
+ " datascience | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " om | \n",
+ " 98214039 | \n",
+ " I eat linear models for breakfast #datascience... | \n",
+ " ml | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " om | \n",
+ " 98214039 | \n",
+ " I eat linear models for breakfast #datascience... | \n",
+ " crossfit | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " screenname id_str text \\\n",
+ "0 nandanrao 928374987 Woah, pandas is so much fun #worldrocked #jawd... \n",
+ "1 nandanrao 928374987 Woah, pandas is so much fun #worldrocked #jawd... \n",
+ "2 nandanrao 928374987 Woah, pandas is so much fun #worldrocked #jawd... \n",
+ "3 om 98214039 I eat linear models for breakfast #datascience... \n",
+ "4 om 98214039 I eat linear models for breakfast #datascience... \n",
+ "5 om 98214039 I eat linear models for breakfast #datascience... \n",
+ "\n",
+ " hashtag \n",
+ "0 worldrocked \n",
+ "1 jawdrop \n",
+ "2 ml \n",
+ "3 datascience \n",
+ "4 ml \n",
+ "5 crossfit "
+ ]
+ },
+ "execution_count": 203,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df = tweets.merge(hashtags, how='left')\n",
"\n",
"df"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 205,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "ml 2\n",
+ "datascience 1\n",
+ "crossfit 1\n",
+ "jawdrop 1\n",
+ "worldrocked 1\n",
+ "Name: hashtag, dtype: int64"
+ ]
+ },
+ "execution_count": 205,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.hashtag.value_counts()"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -1218,6 +4411,958 @@
"\n",
"*Needless to say that eyeballing is OK for making sure your code makes sense, but will not result in full credits for the project. We want a fully automated code. To carry out the project successfully you need to use most the attributes and methods described earlier. The last one is a little tricky*"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 156,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Product | \n",
+ " Price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " tomato | \n",
+ " 2.1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " potato | \n",
+ " 3.4 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " apple | \n",
+ " 1.2 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " orange | \n",
+ " 4.3 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " banana | \n",
+ " 5.2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Product Price\n",
+ "0 tomato 2.1\n",
+ "1 potato 3.4\n",
+ "2 apple 1.2\n",
+ "3 orange 4.3\n",
+ "4 banana 5.2"
+ ]
+ },
+ "execution_count": 156,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "prices=pd.read_csv(\"supermarket_prices.csv\")\n",
+ "prices.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 157,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Buyer | \n",
+ " Product | \n",
+ " Quantity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Jackson | \n",
+ " apple | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Jackson | \n",
+ " apple | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " John | \n",
+ " orange | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " John | \n",
+ " potato | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Tom | \n",
+ " tomato | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Buyer Product Quantity\n",
+ "0 Jackson apple 4\n",
+ "1 Jackson apple 9\n",
+ "2 John orange 9\n",
+ "3 John potato 10\n",
+ "4 Tom tomato 4"
+ ]
+ },
+ "execution_count": 157,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "transactions=pd.read_csv(\"supermarket_transactions.csv\")\n",
+ "transactions.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 158,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Buyer | \n",
+ " Product | \n",
+ " Quantity | \n",
+ " Price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Jackson | \n",
+ " apple | \n",
+ " 4 | \n",
+ " 1.2 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Jackson | \n",
+ " apple | \n",
+ " 9 | \n",
+ " 1.2 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " John | \n",
+ " orange | \n",
+ " 9 | \n",
+ " 4.3 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " John | \n",
+ " potato | \n",
+ " 10 | \n",
+ " 3.4 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Tom | \n",
+ " tomato | \n",
+ " 4 | \n",
+ " 2.1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Buyer Product Quantity Price\n",
+ "0 Jackson apple 4 1.2\n",
+ "1 Jackson apple 9 1.2\n",
+ "2 John orange 9 4.3\n",
+ "3 John potato 10 3.4\n",
+ "4 Tom tomato 4 2.1"
+ ]
+ },
+ "execution_count": 158,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df=transactions.join(prices.set_index('Product'), on='Product')\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 162,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Buyer\n",
+ "Emma 81\n",
+ "Jackson 70\n",
+ "John 122\n",
+ "Liam 81\n",
+ "Lucas 62\n",
+ "Sandra 78\n",
+ "Sophia 61\n",
+ "Tom 49\n",
+ "Name: Quantity, dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "\n",
+ "# How many items\n",
+ "display(df.groupby(\"Buyer\").Quantity.sum())\n",
+ "#display(df.columns)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 163,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Buyer Product\n",
+ "Emma apple 25\n",
+ " banana 26\n",
+ " potato 14\n",
+ " tomato 16\n",
+ "Jackson apple 18\n",
+ " orange 28\n",
+ " potato 8\n",
+ " tomato 16\n",
+ "John apple 7\n",
+ " banana 28\n",
+ " orange 46\n",
+ " potato 18\n",
+ " tomato 23\n",
+ "Liam apple 21\n",
+ " banana 16\n",
+ " orange 16\n",
+ " potato 21\n",
+ " tomato 7\n",
+ "Lucas apple 14\n",
+ " banana 3\n",
+ " orange 17\n",
+ " potato 9\n",
+ " tomato 19\n",
+ "Sandra banana 2\n",
+ " orange 37\n",
+ " potato 38\n",
+ " tomato 1\n",
+ "Sophia apple 14\n",
+ " banana 13\n",
+ " orange 7\n",
+ " potato 14\n",
+ " tomato 13\n",
+ "Tom apple 18\n",
+ " banana 6\n",
+ " potato 16\n",
+ " tomato 9\n",
+ "Name: Quantity, dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# How many items\n",
+ "display(df.groupby([\"Buyer\",\"Product\"]).Quantity.sum())\n",
+ "#display(df.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 164,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[\"bill\"]=df[\"Quantity\"]*df[\"Price\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 165,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Buyer', 'Product', 'Quantity', 'Price', 'bill'], dtype='object')"
+ ]
+ },
+ "execution_count": 165,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 166,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Buyer | \n",
+ " Product | \n",
+ " Quantity | \n",
+ " Price | \n",
+ " bill | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Jackson | \n",
+ " apple | \n",
+ " 4 | \n",
+ " 1.2 | \n",
+ " 4.8 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Jackson | \n",
+ " apple | \n",
+ " 9 | \n",
+ " 1.2 | \n",
+ " 10.8 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " John | \n",
+ " orange | \n",
+ " 9 | \n",
+ " 4.3 | \n",
+ " 38.7 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " John | \n",
+ " potato | \n",
+ " 10 | \n",
+ " 3.4 | \n",
+ " 34.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Tom | \n",
+ " tomato | \n",
+ " 4 | \n",
+ " 2.1 | \n",
+ " 8.4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Buyer Product Quantity Price bill\n",
+ "0 Jackson apple 4 1.2 4.8\n",
+ "1 Jackson apple 9 1.2 10.8\n",
+ "2 John orange 9 4.3 38.7\n",
+ "3 John potato 10 3.4 34.0\n",
+ "4 Tom tomato 4 2.1 8.4"
+ ]
+ },
+ "execution_count": 166,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 181,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Buyer\n",
+ "Emma 246.4\n",
+ "Jackson 202.8\n",
+ "John 461.3\n",
+ "Liam 263.3\n",
+ "Lucas 176.0\n",
+ "Sandra 300.8\n",
+ "Sophia 189.4\n",
+ "Tom 126.1\n",
+ "Name: bill, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "total_spent=df.groupby([\"Buyer\"]).bill.sum()\n",
+ "display(total_spent)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 185,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Buyer\n",
+ "Emma 135.2\n",
+ "John 145.6\n",
+ "Liam 83.2\n",
+ "Lucas 15.6\n",
+ "Sandra 10.4\n",
+ "Sophia 67.6\n",
+ "Tom 31.2\n",
+ "Name: bill, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "banana_spent=df[df.Product==\"banana\"].groupby([\"Buyer\"]).bill.sum()\n",
+ "display(banana_spent)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 188,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['bill'], dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Emma', 'Jackson', 'John', 'Liam', 'Lucas', 'Sandra', 'Sophia', 'Tom'], dtype='object', name='Buyer')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Emma', 'John', 'Liam', 'Lucas', 'Sandra', 'Sophia', 'Tom'], dtype='object', name='Buyer')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " bill_x | \n",
+ " bill_y | \n",
+ "
\n",
+ " \n",
+ " | Buyer | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Emma | \n",
+ " 246.4 | \n",
+ " 135.2 | \n",
+ "
\n",
+ " \n",
+ " | Jackson | \n",
+ " 202.8 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | John | \n",
+ " 461.3 | \n",
+ " 145.6 | \n",
+ "
\n",
+ " \n",
+ " | Liam | \n",
+ " 263.3 | \n",
+ " 83.2 | \n",
+ "
\n",
+ " \n",
+ " | Lucas | \n",
+ " 176.0 | \n",
+ " 15.6 | \n",
+ "
\n",
+ " \n",
+ " | Sandra | \n",
+ " 300.8 | \n",
+ " 10.4 | \n",
+ "
\n",
+ " \n",
+ " | Sophia | \n",
+ " 189.4 | \n",
+ " 67.6 | \n",
+ "
\n",
+ " \n",
+ " | Tom | \n",
+ " 126.1 | \n",
+ " 31.2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bill_x bill_y\n",
+ "Buyer \n",
+ "Emma 246.4 135.2\n",
+ "Jackson 202.8 NaN\n",
+ "John 461.3 145.6\n",
+ "Liam 263.3 83.2\n",
+ "Lucas 176.0 15.6\n",
+ "Sandra 300.8 10.4\n",
+ "Sophia 189.4 67.6\n",
+ "Tom 126.1 31.2"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "total_spent=pd.DataFrame(total_spent)\n",
+ "banana_spent=pd.DataFrame(banana_spent)\n",
+ "\n",
+ "display(total_spent.columns)\n",
+ "display(total_spent.index)\n",
+ "display(banana_spent.index)\n",
+ "\n",
+ "\n",
+ "\n",
+ "spent_table=total_spent.merge(banana_spent,how='outer',left_index=True, right_index=True)\n",
+ "display(spent_table)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 190,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Total_spent | \n",
+ " Banana_spent | \n",
+ "
\n",
+ " \n",
+ " | Buyer | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Emma | \n",
+ " 246.4 | \n",
+ " 135.2 | \n",
+ "
\n",
+ " \n",
+ " | Jackson | \n",
+ " 202.8 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | John | \n",
+ " 461.3 | \n",
+ " 145.6 | \n",
+ "
\n",
+ " \n",
+ " | Liam | \n",
+ " 263.3 | \n",
+ " 83.2 | \n",
+ "
\n",
+ " \n",
+ " | Lucas | \n",
+ " 176.0 | \n",
+ " 15.6 | \n",
+ "
\n",
+ " \n",
+ " | Sandra | \n",
+ " 300.8 | \n",
+ " 10.4 | \n",
+ "
\n",
+ " \n",
+ " | Sophia | \n",
+ " 189.4 | \n",
+ " 67.6 | \n",
+ "
\n",
+ " \n",
+ " | Tom | \n",
+ " 126.1 | \n",
+ " 31.2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Total_spent Banana_spent\n",
+ "Buyer \n",
+ "Emma 246.4 135.2\n",
+ "Jackson 202.8 NaN\n",
+ "John 461.3 145.6\n",
+ "Liam 263.3 83.2\n",
+ "Lucas 176.0 15.6\n",
+ "Sandra 300.8 10.4\n",
+ "Sophia 189.4 67.6\n",
+ "Tom 126.1 31.2"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "spent_table.columns=[\"Total_spent\",\"Banana_spent\"]\n",
+ "\n",
+ "display(spent_table)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 191,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Total_spent | \n",
+ " Banana_spent | \n",
+ " Banana_share | \n",
+ "
\n",
+ " \n",
+ " | Buyer | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Emma | \n",
+ " 246.4 | \n",
+ " 135.2 | \n",
+ " 0.548701 | \n",
+ "
\n",
+ " \n",
+ " | Jackson | \n",
+ " 202.8 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | John | \n",
+ " 461.3 | \n",
+ " 145.6 | \n",
+ " 0.315630 | \n",
+ "
\n",
+ " \n",
+ " | Liam | \n",
+ " 263.3 | \n",
+ " 83.2 | \n",
+ " 0.315989 | \n",
+ "
\n",
+ " \n",
+ " | Lucas | \n",
+ " 176.0 | \n",
+ " 15.6 | \n",
+ " 0.088636 | \n",
+ "
\n",
+ " \n",
+ " | Sandra | \n",
+ " 300.8 | \n",
+ " 10.4 | \n",
+ " 0.034574 | \n",
+ "
\n",
+ " \n",
+ " | Sophia | \n",
+ " 189.4 | \n",
+ " 67.6 | \n",
+ " 0.356917 | \n",
+ "
\n",
+ " \n",
+ " | Tom | \n",
+ " 126.1 | \n",
+ " 31.2 | \n",
+ " 0.247423 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Total_spent Banana_spent Banana_share\n",
+ "Buyer \n",
+ "Emma 246.4 135.2 0.548701\n",
+ "Jackson 202.8 NaN NaN\n",
+ "John 461.3 145.6 0.315630\n",
+ "Liam 263.3 83.2 0.315989\n",
+ "Lucas 176.0 15.6 0.088636\n",
+ "Sandra 300.8 10.4 0.034574\n",
+ "Sophia 189.4 67.6 0.356917\n",
+ "Tom 126.1 31.2 0.247423"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "spent_table[\"Banana_share\"]=spent_table[\"Banana_spent\"]/spent_table[\"Total_spent\"]\n",
+ "\n",
+ "display(spent_table)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 193,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Buyer\n",
+ "Liam 0.315989\n",
+ "Sophia 0.356917\n",
+ "Emma 0.548701\n",
+ "Name: Banana_share, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "banana_kings=spent_table.Banana_share.sort_values().iloc[-4:-1]\n",
+ "display(banana_kings)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 199,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Buyer Product\n",
+ "Emma apple 30.0\n",
+ " banana 135.2\n",
+ " potato 47.6\n",
+ " tomato 33.6\n",
+ "Liam apple 25.2\n",
+ " banana 83.2\n",
+ " orange 68.8\n",
+ " potato 71.4\n",
+ " tomato 14.7\n",
+ "Sophia apple 16.8\n",
+ " banana 67.6\n",
+ " orange 30.1\n",
+ " potato 47.6\n",
+ " tomato 27.3\n",
+ "Name: bill, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "#display(df[df[\"Buyer\"].isin(banana_kings.index)])\n",
+ "\n",
+ "\n",
+ "display(df[df[\"Buyer\"].isin(banana_kings.index)].groupby([\"Buyer\",\"Product\"]).bill.sum())\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
@@ -1237,7 +5382,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.3"
+ "version": "3.7.3"
}
},
"nbformat": 4,