# Day 12: Pandas

- https://pandas.pydata.org/
- https://pandas.pydata.org/docs/getting_started/index.html

In [2]:
import pandas as pd
import numpy as np

### Example Questions

Q1. Create a Pandas Series from a Python list and perform basic operations like filtering, sorting, etc.

In [8]:
nums = [4, 8, 9, 2, 5, 7]
ser_nums = pd.Series(nums)
print(f"Series: \n{ser_nums}")
print(f"Filter by index:\n{ser_nums.filter(items= [0, 2, 4, 6])}")
print(f"Sort by values:\n{ser_nums.sort_values()}")
print(f"Power of 2:\n{ser_nums.pow(2)}")

Series: 
0    4
1    8
2    9
3    2
4    5
5    7
dtype: int64
Filter by index:
0    4
2    9
4    5
dtype: int64
Sort by values:
3    2
0    4
4    5
5    7
1    8
2    9
dtype: int64
Power of 2:
0    16
1    64
2    81
3     4
4    25
5    49
dtype: int64


Q2. Read a CSV file into a Pandas DataFrame and perform basic data manipulation operations.

In [9]:
df = pd.read_csv("data/day_12/employees.csv")
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        20 non-null     object
 1   Age         20 non-null     int64 
 2   Department  20 non-null     object
 3   Position    20 non-null     object
 4   Salary      20 non-null     int64 
dtypes: int64(2), object(3)
memory usage: 932.0+ bytes


Unnamed: 0,Name,Age,Department,Position,Salary
0,John Doe,29,Engineering,Software Engineer,75000
1,Jane Smith,34,Marketing,Marketing Manager,68000
2,Alice Johnson,28,HR,HR Specialist,52000
3,Bob Brown,45,Finance,Accountant,60000
4,Charlie Davis,31,Engineering,DevOps Engineer,80000


In [None]:
df["Name"].groupby(df["Department"]).count()

Department
Engineering    6
Finance        4
HR             3
Marketing      4
Sales          3
Name: Name, dtype: int64

In [15]:
df.isnull().sum()

Name          0
Age           0
Department    0
Position      0
Salary        0
dtype: int64

Q3. Create a Pandas DataFrame from a dictionary and perform filtering and grouping operations.

In [18]:
employee_details = [
    {"Name": "John Doe", "Age": 29, "Department": "Engineering", "Position": "Software Engineer", "Salary": 75000},
    {"Name": "Jane Smith", "Age": 34, "Department": "Marketing", "Position": "Marketing Manager", "Salary": 68000},
    {"Name": "Alice Johnson", "Age": 28, "Department": "HR", "Position": "HR Specialist", "Salary": 52000},
    {"Name": "Bob Brown", "Age": 45, "Department": "Finance", "Position": "Accountant", "Salary": 60000},
    {"Name": "Charlie Davis", "Age": 31, "Department": "Engineering", "Position": "DevOps Engineer", "Salary": 80000},
    {"Name": "Emily White", "Age": 26, "Department": "Sales", "Position": "Sales Representative", "Salary": 45000},
    {"Name": "Frank Harris", "Age": 39, "Department": "Engineering", "Position": "Team Lead", "Salary": 90000},
    {"Name": "Grace Lee", "Age": 30, "Department": "Marketing", "Position": "Content Writer", "Salary": 50000},
]

emp_df = pd.DataFrame.from_dict(employee_details)
emp_df

Unnamed: 0,Name,Age,Department,Position,Salary
0,John Doe,29,Engineering,Software Engineer,75000
1,Jane Smith,34,Marketing,Marketing Manager,68000
2,Alice Johnson,28,HR,HR Specialist,52000
3,Bob Brown,45,Finance,Accountant,60000
4,Charlie Davis,31,Engineering,DevOps Engineer,80000
5,Emily White,26,Sales,Sales Representative,45000
6,Frank Harris,39,Engineering,Team Lead,90000
7,Grace Lee,30,Marketing,Content Writer,50000


In [None]:
df.filter

[1;31mSignature:[0m
[0mdf[0m[1;33m.[0m[0mfilter[0m[1;33m([0m[1;33m
[0m    [0mitems[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mlike[0m[1;33m:[0m [1;34m'str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mregex[0m[1;33m:[0m [1;34m'str | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0maxis[0m[1;33m:[0m [1;34m'Axis | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m [1;33m->[0m [1;34m'Self'[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Subset the dataframe rows or columns according to the specified index labels.

Note that this routine does not filter a dataframe on its
contents. The filter is applied to the labels of the index.

Parameters
----------
items : list-like
    Keep labels from axis which are in items.
like : str
    Keep labels from axis for which "like in label == True".
regex : str (regular expression)
    Keep labels from axis for which re.search(regex