# Notebook 2- Pandas DataFrames

<img src="https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/pandas/pandas.png" alt="numpy logo" width = "300">


## Import Pandas

In [None]:
# Import Pandas and numpy
import pandas as pd
import numpy as np

**DataFrame**

<img src="https://raw.githubusercontent.com/fralfaro/DS-Cheat-Sheets/main/docs/examples/pandas/df.png" alt="numpy logo" >

**two-dimensional** labeled data structure with columns of potentially different types.

## **Axis attribute**

<img src="https://i.stack.imgur.com/dcoE3.jpg">


## the first running vertically downwards across rows (axis 0)

## and the second running horizontally across columns (axis 1).

In [None]:
# Create a pandas DataFrame
data = {
    'Country': ['Belgium', 'India', 'Brazil'],
    'Capital': ['Brussels', 'New Delhi', 'Brasília'],
    'Population': [11190846, 1303171035, 207847528]
}
df = pd.DataFrame(
    data,
    columns=['Country', 'Capital', 'Population']
)

# Print the DataFrame 'df'
print("\ndf:")
df

## Getting Elements


In [None]:
df.Country

In [None]:
df["Country"]

In [None]:
df[["Country"]]

In [None]:
df[["Country", "Capital"]]

In [None]:
# Get subset of a DataFrame
df[1:]

## Loc vs iloc

<img src="https://miro.medium.com/v2/resize:fit:1400/1*I2PXtq9CHadRlLuM4numUw.png" width = 500>

## iloc

In [None]:
df.iloc[0:2,[0]]

In [None]:
df.iloc[[0],[0]]

In [None]:
df.iloc[0:2,0:2]

## loc

In [None]:
df.loc[[0], ['Country']]

In [None]:
df.loc[2]

In [None]:
df.loc[0:2,'Capital']

In [None]:
df.iloc[0:2, 1]

## Filtering

In [None]:
df = pd.DataFrame({

'name':['Jane','John','Ashley','Mike','Emily','Jack','Catlin'],
'ctg':['A','A','C','B','B','C','B'],
'val':np.random.random(7).round(2),
'val2':np.random.randint(1,10, size=7)

})

In [None]:
df

### 1. Logical Operators

In [None]:
df[df.val > 0.5]

In [None]:
df[df.name > 'Jane']

### 2. Multiple Logical Operators

In [None]:
df[(df.val > 0.5) & (df.val2 == 7)]

In [None]:
df[(df.val < 0.5) | (df.val2 == 7)]

### 3. Isin

In [None]:
names = ['John','Catlin','Mike']
df[df.name.isin(names)]

### 4. Str accessor

In [None]:
df[df.name.str.startswith('J')]

### 5. Tilde (~)

In [None]:
df[~df.name.str.contains('y')]

### 6. Query

In [None]:
df.query('ctg == "B" and val > 0.5')

## Indexing

In [None]:
df.set_index("name", drop=False)

In [None]:
df.set_index("name", inplace=True)
df

In [None]:
df.reset_index(drop=False)

In [None]:
df.reset_index(drop=True)

## Dropping


In [None]:
# Create a pandas DataFrame
data = {
    'Country': ['Belgium', 'India', 'Brazil'],
    'Capital': ['Brussels', 'New Delhi', 'Brasília'],
    'Population': [11190846, 1303171035, 207847528]
}
df = pd.DataFrame(
    data,
    columns=['Country', 'Capital', 'Population']
)

# Print the DataFrame 'df'
print("\ndf:")
df

In [None]:
# Drop values from columns (axis=1)
df.drop('Country', axis=1)

In [None]:
# Drop values from Rows (axis=0)
df.drop(0, axis=0)

## Sorting


In [None]:
df2 =pd.read_csv("Data/vgsalesGlobale.csv")

In [None]:
df2

In [None]:
# Sort by labels along an axis
df2.sort_index(ascending= False)

In [None]:
# Sort by labels along an axis
df2.sort_index(axis = 1)

In [None]:
# Sort by the values along an axis
df2.sort_values("Name")

In [None]:
df2.sort_values("Year")

In [None]:
df2[["Genre","Year"]].sort_values(["Genre","Year"], ascending=True)

## Ranking

In [None]:
df

In [None]:
# Assign ranks to entries
df.rank(ascending=True)

In [None]:
# Assign ranks to entries
df.rank(ascending=False)

# Applying Functions


In [None]:
df3 = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])

In [None]:
df3

In [None]:
# Define a function
f = lambda x: x.max() - x.min()

In [None]:
# Define a function
def f(x):
    return x.max() - x.min()

In [None]:
f2 = lambda x : x*2

### 1- (apply) works on a row / column basis of a DataFrame

In [None]:
# Apply function to DataFrame
df3.apply(f, axis = 0)

In [None]:
# Apply function to DataFrame
df3.apply(f, axis = 1)

In [None]:
# Apply function to DataFrame
df3.apply(f2, axis = 1)

### 2- (applymap) works element-wise on a DataFrame

In [None]:
f2 = lambda x : x*2

In [None]:
# Apply function element-wise
df3.applymap(f2)

### 3- (map) works element-wise on a Series

In [None]:
# Create a pandas Series
s = pd.Series(
    [3, -5, 7, 4],
    index=['a', 'b', 'c', 'd']
)

# Print the pandas Series
print("s:")
s

In [None]:
f3 = lambda x : x*2

In [None]:
# map function element-wise on series
s.map(f3)

## Arithmetic Operations

In [None]:
df1=pd.DataFrame(
    np.arange(6).reshape(2,3),
    columns=list("ABC"),
    index=["Tim","Tom"])
df2=pd.DataFrame(
    np.arange(9).reshape(3,3),
    columns=list("ACD"),
    index=["Tim","Kate","Tom"])

In [None]:
print(df1)
print(df2)

In [None]:
df1+df2

In [None]:
df1.add(df2,fill_value=0)

In [None]:
1/df1

In [None]:
df1*3

## DataFrame and series operations

In [None]:
print(df2)

In [None]:
s=df2.iloc[1]
s

In [None]:
df2-s

In [None]:
s2=df2["A"]
s2

In [None]:
df2.sub(s2,axis=0)