# import pandas

In [183]:
import pandas as pd

### explain callable format

In [184]:
pd.options.display.float_format = '{:,.0f}'.format

<blockquote>

f is a method object bound to a string instance

the format method is bound to a `str('{:,.0f}')` object

when passed as an argument and called, it remebers the self argument

the original self object can be retrieved via the `__self__`attribute
</blockquote>


In [185]:
f = '{:,.0f}'.format
f'{f.__self__ = }'

"f.__self__ = '{:,.0f}'"

# import excel file

In [186]:
df = pd.read_excel('advworksdata.xlsx')
df.head() # first 5

Unnamed: 0,productcategory,productsubcategory,product,saleterritory,Country,City,Sate,Customer,Employee,OrderCount,OrderDate,StandardCost,UnitPrice,UnitPriceDiscount,Discount,ListPrice,SaleswithStandard,NetSales,OrderQuantity,Sales
0,Clothing,Caps,AWC Logo Cap,United Kingdom,United Kingdom,Berks,England,Gary Suess,Amy Alberts,1,2012-06-30,7,5,0,1,9,97,-28,14,69
1,Accessories,Locks,Cable Lock,United Kingdom,United Kingdom,Berks,England,Gary Suess,Amy Alberts,1,2012-06-30,10,15,0,0,25,62,28,6,90
2,Clothing,Gloves,"Full-Finger Gloves, L",United Kingdom,United Kingdom,Berks,England,Gary Suess,Amy Alberts,1,2012-06-30,16,23,0,0,38,125,57,8,182
3,Clothing,Gloves,"Full-Finger Gloves, L",United Kingdom,United Kingdom,London,England,Linda Martin,Amy Alberts,1,2012-06-30,16,21,0,17,38,251,67,16,318
4,Clothing,Gloves,"Full-Finger Gloves, M",United Kingdom,United Kingdom,Berks,England,Gary Suess,Amy Alberts,1,2012-06-30,16,23,0,0,38,110,50,7,160


## check dataframe properties

#### show dataframe type

In [187]:
type(df)

pandas.core.frame.DataFrame

### check if dataframe is empty

In [188]:
df.empty

False

### show dataframe shape

> shape is shown in (rows, columns)

In [189]:
df.shape

(60880, 20)

### get list of columns

In [190]:
df.columns

Index(['productcategory', 'productsubcategory', 'product', 'saleterritory',
       'Country', 'City', 'Sate', 'Customer', 'Employee', 'OrderCount',
       'OrderDate', 'StandardCost', 'UnitPrice', 'UnitPriceDiscount',
       'Discount', 'ListPrice', 'SaleswithStandard', 'NetSales',
       'OrderQuantity', 'Sales'],
      dtype='object')

# work with dataframe

### rename dataframe columns

> provide a dictionary with the following structure:

    {abc:def}

In [191]:
df = df.rename(columns={
    'productcategory': 'category',
    'saleterritory': 'territory',
    'productsubcategory': 'subcategory',
    'OrderQuantity': 'quantity',
    'OrderDate': 'date',
    'Sate': 'state'
})
df.head()

Unnamed: 0,category,subcategory,product,territory,Country,City,state,Customer,Employee,OrderCount,date,StandardCost,UnitPrice,UnitPriceDiscount,Discount,ListPrice,SaleswithStandard,NetSales,quantity,Sales
0,Clothing,Caps,AWC Logo Cap,United Kingdom,United Kingdom,Berks,England,Gary Suess,Amy Alberts,1,2012-06-30,7,5,0,1,9,97,-28,14,69
1,Accessories,Locks,Cable Lock,United Kingdom,United Kingdom,Berks,England,Gary Suess,Amy Alberts,1,2012-06-30,10,15,0,0,25,62,28,6,90
2,Clothing,Gloves,"Full-Finger Gloves, L",United Kingdom,United Kingdom,Berks,England,Gary Suess,Amy Alberts,1,2012-06-30,16,23,0,0,38,125,57,8,182
3,Clothing,Gloves,"Full-Finger Gloves, L",United Kingdom,United Kingdom,London,England,Linda Martin,Amy Alberts,1,2012-06-30,16,21,0,17,38,251,67,16,318
4,Clothing,Gloves,"Full-Finger Gloves, M",United Kingdom,United Kingdom,Berks,England,Gary Suess,Amy Alberts,1,2012-06-30,16,23,0,0,38,110,50,7,160


## get subset of dataframe

### get one column

In [192]:
# df['product'].head()
df['product'][0:5]


0             AWC Logo Cap
1               Cable Lock
2    Full-Finger Gloves, L
3    Full-Finger Gloves, L
4    Full-Finger Gloves, M
Name: product, dtype: object

### get more columns

In [193]:
#  pass a list of column names to filter
df[['category','product']][0:3]

Unnamed: 0,category,product
0,Clothing,AWC Logo Cap
1,Accessories,Cable Lock
2,Clothing,"Full-Finger Gloves, L"


### limit columns of dataframe

In [194]:
df.columns

Index(['category', 'subcategory', 'product', 'territory', 'Country', 'City',
       'state', 'Customer', 'Employee', 'OrderCount', 'date', 'StandardCost',
       'UnitPrice', 'UnitPriceDiscount', 'Discount', 'ListPrice',
       'SaleswithStandard', 'NetSales', 'quantity', 'Sales'],
      dtype='object')

In [195]:
df = df['category subcategory product date ListPrice quantity Sales'.split()]
df.head()

Unnamed: 0,category,subcategory,product,date,ListPrice,quantity,Sales
0,Clothing,Caps,AWC Logo Cap,2012-06-30,9,14,69
1,Accessories,Locks,Cable Lock,2012-06-30,25,6,90
2,Clothing,Gloves,"Full-Finger Gloves, L",2012-06-30,38,8,182
3,Clothing,Gloves,"Full-Finger Gloves, L",2012-06-30,38,16,318
4,Clothing,Gloves,"Full-Finger Gloves, M",2012-06-30,38,7,160


## work with dataframe data types

### show dataframe columns data types

In [196]:
df.dtypes

category               object
subcategory            object
product                object
date           datetime64[ns]
ListPrice             float64
quantity                int64
Sales                 float64
dtype: object

### convert column data type

In [197]:
df['date'] = pd.to_datetime(df['date'])
df['date'].head()

0   2012-06-30
1   2012-06-30
2   2012-06-30
3   2012-06-30
4   2012-06-30
Name: date, dtype: datetime64[ns]

### raise error when trying to convert NANs (NULLs)

In [198]:
df['ListPrice'] = df['ListPrice'].astype(int)

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

## fixing NULL values

### check whether there are columns containing nulls (nans)

In [16]:
df.isnull().any()

category              True
subcategory           True
product               True
territory            False
Country              False
City                 False
state                False
Customer             False
Employee             False
OrderCount           False
date                 False
StandardCost          True
UnitPrice            False
UnitPriceDiscount    False
Discount             False
ListPrice             True
SaleswithStandard     True
NetSales              True
quantity             False
Sales                False
dtype: bool

### count null values per columns

In [None]:
df.isnull().sum()

category       39
subcategory    39
product        39
date            0
ListPrice      39
quantity        0
Sales           0
dtype: int64

### fill column null values with custom values

In [None]:
# df['category'].fillna('unknown', inplace=True)  # deprecated in pandas 3.0
df.category = df['category'].fillna('unknown')
df.fillna({'category':'unknown'}, inplace=True)
df[df.category == 'unknown'].head()

Unnamed: 0,category,subcategory,product,date,ListPrice,quantity,Sales
581,unknown,,,2014-02-28,,2,117
582,unknown,,,2014-02-28,,1,564
42825,unknown,,,2013-05-30,,2,117
42826,unknown,,,2013-05-30,,2,1128
42827,unknown,,,2013-05-30,,2,528


In [None]:
df.fillna({'subcategory':'unknown'}, inplace=True)


In [None]:
df.fillna({'category':0}, inplace=True)


## selecting values

In [141]:
df.category.unique()

array(['Clothing', 'Accessories', 'Components', 'Bikes', 'unknown'],
      dtype=object)

In [142]:
df.Sales.sum()

np.float64(80487704.17918801)

In [143]:
df.quantity.count()

np.int64(60880)

## slicing

### select records

<blockquote>
slice the dataframe records

[from-included:to-excluded]
</blockquote>

In [168]:
df[10:13]

Unnamed: 0,category,subcategory,product,date,ListPrice,quantity,Sales
10,Clothing,Gloves,"Half-Finger Gloves, S",2012-06-30,24,3,42
11,Components,Forks,HL Fork,2012-06-30,229,6,826
12,Components,Headsets,HL Headset,2012-06-30,125,2,150


### select columns and records

<blockquote>
pass a list of columns to filter columns

slice the record containing the resulting columns

[from-included:to-excluded]
</blockquote>

In [169]:
df[['category', 'date', 'Sales']][10:13]

Unnamed: 0,category,date,Sales
10,Clothing,2012-06-30,42
11,Components,2012-06-30,826
12,Components,2012-06-30,150


### select records and column

In [26]:
df[10:13]['category'].str.lower()

10      clothing
11    components
12    components
Name: category, dtype: object

> same thing, just swapped selectors


In [170]:
df['category'][10:13].str.lower()

10      clothing
11    components
12    components
Name: category, dtype: object

In [200]:
# df[10:13]['category'] = df[10:13]['category'].str.lower()     # deprecated in pandas 3.0

# Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.
df.loc[10:13, 'category'] = df.loc[10:13, 'category'].str.lower()
df[10:13]['category']

10      clothing
11    components
12    components
Name: category, dtype: object

---

In [175]:
df[0:3]

Unnamed: 0,category,subcategory,product,date,ListPrice,quantity,Sales
0,Clothing,Caps,AWC Logo Cap,2012-06-30,8,14,69
1,Accessories,Locks,Cable Lock,2012-06-30,25,6,90
2,Clothing,Gloves,"Full-Finger Gloves, L",2012-06-30,37,8,182


In [185]:
df[df.category.name]
type(df.category.name)

str

In [187]:
df.T.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,60870,60871,60872,60873,60874,60875,60876,60877,60878,60879
category,Clothing,Accessories,Clothing,Clothing,Clothing,Clothing,Clothing,Clothing,Clothing,Clothing,...,Bikes,Bikes,Bikes,Accessories,Clothing,Clothing,Clothing,Clothing,Clothing,Clothing
subcategory,Caps,Locks,Gloves,Gloves,Gloves,Gloves,Gloves,Gloves,Gloves,Gloves,...,Touring Bikes,Touring Bikes,Touring Bikes,Bottles and Cages,Shorts,Shorts,Shorts,Shorts,Shorts,Shorts
product,AWC Logo Cap,Cable Lock,"Full-Finger Gloves, L","Full-Finger Gloves, L","Full-Finger Gloves, M","Full-Finger Gloves, M","Full-Finger Gloves, S","Full-Finger Gloves, S","Half-Finger Gloves, L","Half-Finger Gloves, M",...,"Touring-3000 Yellow, 62","Touring-3000 Yellow, 62","Touring-3000 Yellow, 62",Water Bottle - 30 oz.,"Women's Mountain Shorts, L","Women's Mountain Shorts, L","Women's Mountain Shorts, L","Women's Mountain Shorts, M","Women's Mountain Shorts, S","Women's Mountain Shorts, S"


In [205]:
df = df.rename(columns=str.lower)
df[0:3].sort_index(axis=1, ascending=False)

Unnamed: 0,subcategory,sales,quantity,product,listprice,date,category
0,Caps,69,14,AWC Logo Cap,8,2012-06-30,Clothing
1,Locks,90,6,Cable Lock,25,2012-06-30,Accessories
2,Gloves,182,8,"Full-Finger Gloves, L",37,2012-06-30,Clothing
