# PANDAS #2 - TUTORIAL
https://dev.to/gtrindadi/pandas-2-reading-files-and-basic-dataframe-operations-38l2
<br> by: Gabriela Trindade

In [None]:
import pandas as pd

# Downloading file

In [None]:
!wget https://raw.githubusercontent.com/gabrielatrindade/blog-posts-pandas-series/master/restaurant_orders.csv

# Reading a file

In [None]:
pd.read_csv('restaurant_orders.csv', delimiter=',')

In [None]:
# assigning the column names to the dataframe
column_names = ['order_number', 'order_date', 'item_name',
                'quantity', 'product_price']

pd.read_csv('restaurant_orders.csv', delimiter=',', names=column_names)

In [None]:
# creating a variable to the dataframe
orders = pd.read_csv('restaurant_orders.csv', delimiter=',',
                     names=column_names)

In [None]:
# printing the whole dataframe
orders

In [None]:
type(orders)

# Basic DataFrame operations

## Print samples

In [None]:
# printing the first 5 rows
orders.head()

In [None]:
# printing the last 5 rows
orders.tail()

In [None]:
# printing 5 rows randomly
orders.sample(5)

## Select specific rows and columns

In [None]:
# printing from index 0 to 6
orders[0:7]

In [None]:
# printing from index 0 to 5
orders[:6]

In [None]:
# printing a column by returning a DataFrame object
orders[['item_name']]

In [None]:
# printing a column by returning a Series object
orders['item_name']

In [None]:
# alternative way to print a column by returning a Series object
orders.item_name

In [None]:
# printing a specific column and specific lines
orders[['item_name']][:6]

In [None]:
# printing columns and rows using .iloc and .loc
orders.iloc[0:7]

In [None]:
orders.iloc[:, 2:4]

In [None]:
orders.iloc[-1]

In [None]:
orders.loc[0:7, ['order_number', 'item_name']]

In [None]:
orders.loc[[1, 3, 5], ['order_number', 'item_name']]

## Filter by specific values

In [None]:
orders[orders.order_number == 16118]

In [None]:
orders[(orders.order_date == '2019-08-03') |
       (orders.order_date == '2019-08-02')]

## Add rows and columns

In [None]:
# creating a copy of orders dataframe
orders_copy = orders.copy()

In [None]:
orders_copy.head()

In [None]:
# adding a row through append function
orders_copy = orders_copy.append(
    pd.Series([123456, '2019-08-04', 'Product Test', 4, 1.00],
              index=orders_copy.columns),
    ignore_index=True)

In [None]:
orders_copy.tail()

In [None]:
# adding a row using a list
row = [12134567, '2019-08-04', 'Product Test', 4, 1.70]
orders_copy.loc[len(orders_copy)] = row

In [None]:
orders_copy.tail()

In [None]:
# adding a column with default value
orders_copy['discount_pct'] = 10

In [None]:
orders_copy.head()

In [None]:
# adding a column that is the result of an expression
orders_copy['discount_price'] = (orders_copy['product_price'] *
                                 (orders_copy['discount_pct'])) / 100

In [None]:
orders_copy.head()

In [None]:
# adding a column through assign function
orders_copy = orders_copy.assign(
    discount_subtotal=lambda row: (row['quantity'] * row['discount_price']))

In [None]:
orders_copy.head()

In [None]:
# adding a column through apply function
orders_copy['subtotal'] = orders_copy.apply(
    lambda row: (row['quantity'] *
                 (row['product_price'] - row['discount_price'])), axis=1)

In [None]:
orders_copy.head()

## Deleting rows and columns

In [None]:
orders_copy[orders_copy.item_name == 'Product Test']

In [None]:
# deleting a row
orders_copy = orders_copy.drop(orders_copy.index[74818])

In [None]:
orders_copy[orders_copy.index == 74818]

In [None]:
# deleting columns
orders_copy = orders_copy.drop(['discount_pct', 'discount_price',
                                'discount_subtotal', 'subtotal'], axis=1)

In [None]:
orders_copy.head()

## Linear logic

In [None]:
orders[orders.order_date == '2019-08-03']['item_name'].head(3)

In [None]:
orders[['item_name','quantity']].tail(3)

In [None]:
orders[(orders.order_date == '2019-08-03') &
       (orders.item_name == 'Plain Papadum')].tail()