# Import software libraries and load the datasets

In [None]:
import sys
import numpy as np
import pandas as pd

# Summarize software libraries used.
print('Libraries used in this project:')
print('- Python {}'.format(sys.version))
print('- NumPy {}'.format(np.__version__))
print('- pandas {}'.format(pd.__version__))

# Load the datasets.
stores_df = pd.read_csv('/home/student/DSTIP/pandas/data/stores_data_reindex.csv',
                        index_col = 0)
initial_df = pd.read_csv('/home/student/DSTIP/pandas/data/initial_invoices.csv',
                        index_col = 0)
ratings_df = pd.read_csv('/home/student/DSTIP/pandas/data/ratings_more.csv',
                         index_col = 0)
print('\nLoaded datasets.')

# Format floats with comma in thousands place.
pd.options.display.float_format = '{:,.2f}'.format

# Append the initial invoices to `stores_df`

In [None]:
initial_df

In [None]:
print('Number of rows BEFORE append: {}.'.format(stores_df.shape[0]))

stores_df = stores_df.append(initial_df, sort = False)

print('Number of rows AFTER append: {}.'.format(stores_df.shape[0]))

In [None]:
stores_df.tail()

# Merge the customer ratings into `stores_df`

In [None]:
ratings_df

In [None]:
print('Number of rows BEFORE merge: {}.'.format(stores_df.shape[0]))

stores_df = stores_df.join(ratings_df)

print('Number of rows AFTER merge: {}.'.format(stores_df.shape[0]))

In [None]:
stores_df.tail()

# Sort the store data

In [None]:
stores_df.sort_index(axis = 0, inplace = True)
stores_df

In [None]:
stores_df.sort_values(by = ['City', 'ProductLine', 'CustomerType'])

In [None]:
stores_df.sort_values(by = ['CustomerRating'], ascending = False)

# Pivot the data so you can analyze it from different perspectives

In [None]:
stores_df.pivot_table(index = 'Gender',
                      columns = 'ProductLine',
                      values = 'Quantity').round(2)

In [None]:
stores_df.pivot_table(index = 'ProductLine',
                      columns = 'City',
                      values = 'Revenue',
                      aggfunc = np.sum)

# Use grouping to summarize categories of data

In [None]:
stores_df.groupby('City').sum()

In [None]:
stores_df.groupby('City')[['Revenue', 'COGS']].sum()

In [None]:
stores_df.groupby('Gender')['CustomerRating'].mean()