## Pandas series and dataframe

In [3]:
import pandas as pd

students_data = dict(Business = 25, AI = 30, JS= 30, JAVA = 27)
students_data

{'Business': 25, 'AI': 30, 'JS': 30, 'JAVA': 27}

In [4]:
series_program = pd.Series(data=students_data)
series_program

Business    25
AI          30
JS          30
JAVA        27
dtype: int64

In [5]:
print(series_program)

Business    25
AI          30
JS          30
JAVA        27
dtype: int64


In [7]:
series_program.iloc[0], series_program.iloc[-1]

(np.int64(25), np.int64(27))

In [8]:
series_program.keys()

Index(['Business', 'AI', 'JS', 'JAVA'], dtype='object')

In [10]:
series_program["AI"]

np.int64(30)

In [11]:
print(series_program["AI"])

30


In [12]:
series_program["AI"] + 50

np.int64(80)

In [21]:
#another series using list (dict above)

import random as rnd
rnd.seed(42)

dice_list = [rnd.randint(1,6) for _ in range(5)]
dice_list

[6, 1, 1, 6, 3]

In [22]:
dice_series = pd.Series(dice_list)
dice_series

0    6
1    1
2    1
3    6
4    3
dtype: int64

In [24]:
dice_series.min(), dice_series.max(), dice_series.mean()

(np.int64(1), np.int64(6), np.float64(3.4))

## Dataframe
* analog of 2D numpy arrary with flexible row indices and col names

In [25]:
series_program

Business    25
AI          30
JS          30
JAVA        27
dtype: int64

In [28]:
df_programs = pd.DataFrame(series_program, columns=("Num Students",))
df_programs

Unnamed: 0,Num Students
Business,25
AI,30
JS,30
JAVA,27


In [29]:
#create 3 series objects using dictionary
students = pd.Series(dict(AI = 35, NET = 30, APP = 40, Java = 27))
language = pd.Series(dict(AI = "Python", NET = "C#", APP = "Kotlin", Java = "Java"))

students

AI      35
NET     30
APP     40
Java    27
dtype: int64

In [30]:
language

AI      Python
NET         C#
APP     Kotlin
Java      Java
dtype: object

In [35]:
df_programs = pd.DataFrame({"Students": students, "Language" : language})
df_programs

Unnamed: 0,Students,Language
AI,35,Python
NET,30,C#
APP,40,Kotlin
Java,27,Java


In [39]:
import numpy as np

pd.DataFrame(
    {
        "Students": np.array((25, 30, 30, 27)),
        "Language": ["Python", "Csharp", "Kotlin", "Java"]
    },
    index = ["AI", ".NET", "APP", "Java"]
)

Unnamed: 0,Students,Language
AI,25,Python
.NET,30,Csharp
APP,30,Kotlin
Java,27,Java


In [40]:
df_programs.index

Index(['AI', 'NET', 'APP', 'Java'], dtype='object')

## Data Selection

In [41]:
df_programs["Students"]

AI      35
NET     30
APP     40
Java    27
Name: Students, dtype: int64

In [43]:
df_programs[["Language", "Students"]]

Unnamed: 0,Language,Students
AI,Python,35
NET,C#,30
APP,Kotlin,40
Java,Java,27


In [44]:
df_programs.Students
#backet notation is better:)

AI      35
NET     30
APP     40
Java    27
Name: Students, dtype: int64

In [46]:
df_programs["Language"]["NET"]

'C#'

## Indexers

In [48]:
df_programs.loc["Java"]

Students      27
Language    Java
Name: Java, dtype: object

In [49]:
df_programs.loc["AI"]

Students        35
Language    Python
Name: AI, dtype: object

In [51]:
df_programs.loc["AI":"NET"]
#not possible to slice in dict but here in pandas due to based on numpy

Unnamed: 0,Students,Language
AI,35,Python
NET,30,C#


In [53]:
df_programs.iloc[1:3]

Unnamed: 0,Students,Language
NET,30,C#
APP,40,Kotlin


##Masking

In [57]:
df_programs["Students"] > 28

AI       True
NET      True
APP      True
Java    False
Name: Students, dtype: bool

In [58]:
df_programs[df_programs["Students"] > 28]

Unnamed: 0,Students,Language
AI,35,Python
NET,30,C#
APP,40,Kotlin


In [61]:
df_programs.query("Students > 28")

Unnamed: 0,Students,Language
AI,35,Python
NET,30,C#
APP,40,Kotlin
