# Pandas Objects

In [None]:
import pandas as pd
import numpy as np

## Series

Create a `Series` named `s1` with the following properties:

* 10 values that are the first 7 uppercase letters.
* An index of the days of the week capitalized, starting with `Sunday`.

In [None]:
# YOUR CODE HERE
s1 = pd.Series(['a', 'b','c', 'd', 'e', 'f', 'g',],
              index = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'])

In [None]:
s1

In [None]:
v = s1.values
for char in 'abcdefg':
    assert char in v
assert 'Sunday' in s1
assert 'Monday' in s1
assert 'Tuesday' in s1
assert 'Wednesday' in s1
assert 'Thursday' in s1
assert 'Friday' in s1
assert 'Saturday' in s1
assert s1.iloc[0] == 'a'
assert s1.index[0] == 'Sunday'

Use the `.loc` indexer to slice the `s1` `Series` by index values to create a new `Series` named `s2` with only the weekdays.

In [None]:
# YOUR CODE HERE
s2 = s1.loc['Monday':'Friday']

In [None]:
s2

In [None]:
v = s2.values
for char in 'bcdef':
    assert char in v
assert 'Monday' in s2
assert 'Tuesday' in s2
assert 'Wednesday' in s2
assert 'Thursday' in s2
assert 'Friday' in s2

Use the `.iloc` indexer to slice the `s1` `Series` in a manner that reverses its values/index. Name the new `Series` `s3`.

In [None]:
# YOUR CODE HERE
s3 = s1.iloc[::-1]

In [None]:
s3 

In [None]:
assert ''.join(s3.values)=='gfedcba'
assert list(s3.index)==list(reversed(s1.index))

## DataFrame

Create a `DataFrame` named `df1` with the following properties:

* 10 rows.
* An `age` column with random ages between 0 and 100 (inclusive).
* A `cell_phone` column of randomly sampled categorical values `ios`, `android`, `windows`.
* A `gender` column of randomly sampled categorical values `f` and `m`.
* The order of columns should be `gender`, `age`, `cell_phone`.
* A row index consisting of lowercase alphabetical letters.

Use the function defined above to create this `DataFrame`.

In [None]:
# YOUR CODE HERE
df1 = pd.DataFrame(index = ['a', 'b','c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'],
                   data = {'gender': pd.Categorical(np.random.choice(['f', 'm'], 10)),
                          'age': np.random.randint(100, size=10),
                          'cell_phone': pd.Categorical(np.random.choice(['ios', 'android', 'windows'], 10))
                          },
                   columns=['gender', 'age', 'cell_phone']
)

In [None]:
df1

In [None]:
for char in 'abcdefghij':
    assert char in df1.index
assert 'age' in df1.columns
assert 'cell_phone' in df1.columns
assert 'gender' in df1.columns
assert df1.age.dtype.name=='int64'
assert df1.cell_phone.dtype.name=='category'
assert df1.gender.dtype.name=='category'
assert set(df1.gender.unique())=={'f','m'}
assert set(df1.cell_phone.unique())=={'windows','android','ios'}
assert list(df1.columns)==['gender','age','cell_phone']

Create a new `DataFrame`, named `df2` with the following transformations:

* Extract rows `a` through `g`
* Extract the `age` and `gender` column, but put `age` first.
* Reverse the rows to they run `g` to `a`.
* Add a new column named `income` that is a random list of dollars between [0,10000]
* Add a new column named `expenses` that is a random list of dollars between [0,10000]
* Create a new column named `profit` that is computed using the `income` and `expenses` columns.

In [None]:
# YOUR CODE HERE
df2 = df1['a':'g']
df2 = df2[['gender','age']]
df2 = df2.reindex(columns=['age', 'gender'])
df2 = df2.reindex(index=df2.index[::-1])
df2['income'] = np.random.randint(10000, size=7)
df2['expenses'] = np.random.randint(10000, size=7)
df2['profit'] = df2['income'] - df2['expenses']

In [None]:
df2

Make sure your code passes the following `assert` statements:

In [None]:
assert df2.index[0]=='g'
assert df2.index[-1]=='a'
assert list(df2.columns)==['age','gender','income','expenses','profit']
assert df2.income.dtype.name=='int64'
assert df2.expenses.dtype.name=='int64'
assert df2.expenses.dtype.name=='int64'
assert all(df2.profit+df2.expenses-df2.income==0)

Using row filtering, column selection, and the `.mean()` method, calculate and print the average age for men and women in the `df1` `DataFrame`:

In [None]:
# YOUR CODE HERE
male = df1[df1['gender'] == 'm']
female = df1[df1['gender'] == 'f']
m_ages = male['age']
f_ages = female['age']
print (m_ages.mean(), f_ages.mean())

Use the `iloc` indexer on `df1` to extract every other row and the last column. Save the resulting `Series` as `s4`:

In [None]:
# YOUR CODE HERE
s4 = df1.iloc[::2]
s4 = s4['cell_phone']

In [None]:
assert len(s4)==5
assert list(s4.index)==list('acegi')
assert s4.name=='cell_phone'

Use the `.loc` indexer to extract all rows and just the `gender` and `age` columns. Save the resulting `DataFrame` as `df3`.

In [None]:
# YOUR CODE HERE
df3 = df1.iloc[:, :2]

In [None]:
df3

In [None]:
assert list(df3.columns)==['gender','age']
assert len(df3)==10
assert list(df3.index)==list('abcdefghij')