# Pandas basics

In [27]:
import pandas as pd
import numpy as np

In [6]:
df = pd.read_csv('salaries.csv')

In [7]:
df

Unnamed: 0,Name,Salary,Age
0,John,50000,34
1,Sally,120000,45
2,Alyssa,80000,27


In [8]:
df['Salary']

0     50000
1    120000
2     80000
Name: Salary, dtype: int64

In [9]:
df[['Name','Salary']]

Unnamed: 0,Name,Salary
0,John,50000
1,Sally,120000
2,Alyssa,80000


In [10]:
df['Salary'].min()

50000

In [11]:
df['Salary'].max()

120000

In [12]:
df['Salary'].mean()

83333.33333333333

### Conditional filtering

In [14]:
series_of_bool = df['Age'] > 30

In [15]:
series_of_bool

0     True
1     True
2    False
Name: Age, dtype: bool

In [16]:
df[series_of_bool]

Unnamed: 0,Name,Salary,Age
0,John,50000,34
1,Sally,120000,45


In [17]:
df[df['Age'] > 30]

Unnamed: 0,Name,Salary,Age
0,John,50000,34
1,Sally,120000,45


In [18]:
df['Age'].unique()

array([34, 45, 27], dtype=int64)

In [19]:
df['Age'].nunique()

3

In [21]:
df_cols = df.columns

In [22]:
df_cols

Index(['Name', 'Salary', 'Age'], dtype='object')

In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Salary  3 non-null      int64 
 2   Age     3 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 200.0+ bytes


In [24]:
df.describe()

Unnamed: 0,Salary,Age
count,3.0,3.0
mean,83333.333333,35.333333
std,35118.845843,9.073772
min,50000.0,27.0
25%,65000.0,30.5
50%,80000.0,34.0
75%,100000.0,39.5
max,120000.0,45.0


In [26]:
df.index

RangeIndex(start=0, stop=3, step=1)

### Creating a new dataframe with numpy and pandas

In [29]:
mat = np.arange(0,50).reshape(5,10)

In [30]:
new_df = pd.DataFrame(data=mat)

In [31]:
new_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,1,2,3,4,5,6,7,8,9
1,10,11,12,13,14,15,16,17,18,19
2,20,21,22,23,24,25,26,27,28,29
3,30,31,32,33,34,35,36,37,38,39
4,40,41,42,43,44,45,46,47,48,49


In [32]:
mat_1 = np.arange(0,10).reshape(5,2)

In [33]:
newer_df = pd.DataFrame(data=mat_1,columns=['A','B'])

In [34]:
newer_df

Unnamed: 0,A,B
0,0,1
1,2,3
2,4,5
3,6,7
4,8,9


In [35]:
newer_df = pd.DataFrame(data=mat_1,columns=['A','B'],index=['zero','one','two','three','four'])

In [36]:
newer_df

Unnamed: 0,A,B
zero,0,1
one,2,3
two,4,5
three,6,7
four,8,9
