# Import pandas 

In [1]:
import pandas as pd

# Creating data

In [2]:
pd.DataFrame({'Yes': [50, 21], 'No': [131, 2]})

Unnamed: 0,Yes,No
0,50,131
1,21,2


In [3]:
pd.DataFrame({'Prianka': ['I liked it.', 'It was awful.'], 'Pritika': ['Pretty good.', 'Bland.']})

Unnamed: 0,Prianka,Pritika
0,I liked it.,Pretty good.
1,It was awful.,Bland.


In [4]:
pd.DataFrame({'Prianka': ['I liked it.', 'It was awful.'], 
              'Pritika': ['Pretty good.', 'Bland.']},
             index=['Product A', 'Product B'])

Unnamed: 0,Prianka,Pritika
Product A,I liked it.,Pretty good.
Product B,It was awful.,Bland.


In [5]:
pd.Series([1, 2, 3, 4, 5])

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [6]:
pd.Series([30, 35, 40], index=['2015 Sales', '2016 Sales', '2017 Sales'], name='Product A')

2015 Sales    30
2016 Sales    35
2017 Sales    40
Name: Product A, dtype: int64

# Read data

In [10]:
df = pd.read_csv("C:/Users/prian/OneDrive/Desktop/titanic_toy.csv")

In [11]:
df

Unnamed: 0,Age,Fare,Family,Survived
0,22.0,7.2500,1,0
1,38.0,71.2833,1,1
2,26.0,7.9250,0,1
3,35.0,53.1000,1,1
4,35.0,8.0500,0,0
...,...,...,...,...
886,27.0,13.0000,0,0
887,19.0,30.0000,0,1
888,,23.4500,3,0
889,26.0,,0,1


In [12]:
df.head(5)

Unnamed: 0,Age,Fare,Family,Survived
0,22.0,7.25,1,0
1,38.0,71.2833,1,1
2,26.0,7.925,0,1
3,35.0,53.1,1,1
4,35.0,8.05,0,0


In [13]:
df.shape

(891, 4)

In [14]:
df.Age

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64

In [16]:
df['Age'][0]

22.0

# Index based selection

In [17]:
df.iloc[0]

Age         22.00
Fare         7.25
Family       1.00
Survived     0.00
Name: 0, dtype: float64

In [18]:
df.iloc[:,0]

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64

# Label based selection

In [19]:
df.loc[0, 'Age']

22.0

# Selection of any column in index

In [21]:
df.set_index("Age")

Unnamed: 0_level_0,Fare,Family,Survived
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
22.0,7.2500,1,0
38.0,71.2833,1,1
26.0,7.9250,0,1
35.0,53.1000,1,1
35.0,8.0500,0,0
...,...,...,...
27.0,13.0000,0,0
19.0,30.0000,0,1
,23.4500,3,0
26.0,,0,1


# Summary 

In [22]:
df.describe()

Unnamed: 0,Age,Fare,Family,Survived
count,714.0,846.0,891.0,891.0
mean,29.699118,32.279338,0.904602,0.383838
std,14.526497,50.305796,1.613459,0.486592
min,0.42,0.0,0.0,0.0
25%,20.125,7.8958,0.0,0.0
50%,28.0,14.4542,0.0,0.0
75%,38.0,31.20625,1.0,1.0
max,80.0,512.3292,10.0,1.0


In [23]:
df.mean()

Age         29.699118
Fare        32.279338
Family       0.904602
Survived     0.383838
dtype: float64

In [25]:
df.Survived.unique()

array([0, 1], dtype=int64)

In [26]:
df.Survived.value_counts

<bound method IndexOpsMixin.value_counts of 0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64>

# Dtypes

In [27]:
df.Age.dtype

dtype('float64')

In [28]:
df.dtypes

Age         float64
Fare        float64
Family        int64
Survived      int64
dtype: object

In [29]:
df.Age.astype('float64')

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64

In [30]:
df.index.dtype

dtype('int64')

# Missing values

In [31]:
df.isnull().sum()

Age         177
Fare         45
Family        0
Survived      0
dtype: int64

In [32]:
df.Survived.fillna("2")

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

# Combining two dataset

In [34]:
df = pd.read_csv("C:/Users/prian/OneDrive/Desktop/titanic_toy.csv")
dt = pd.read_csv("C:/Users/prian/OneDrive/Desktop/train.csv")

pd.concat([df, dt])

Unnamed: 0,Age,Fare,Family,Survived,PassengerId,Pclass,Name,Sex,SibSp,Parch,Ticket,Cabin,Embarked
0,22.0,7.2500,1.0,0,,,,,,,,,
1,38.0,71.2833,1.0,1,,,,,,,,,
2,26.0,7.9250,0.0,1,,,,,,,,,
3,35.0,53.1000,1.0,1,,,,,,,,,
4,35.0,8.0500,0.0,0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,27.0,13.0000,,0,887.0,2.0,"Montvila, Rev. Juozas",male,0.0,0.0,211536,,S
887,19.0,30.0000,,1,888.0,1.0,"Graham, Miss. Margaret Edith",female,0.0,0.0,112053,B42,S
888,,23.4500,,0,889.0,3.0,"Johnston, Miss. Catherine Helen ""Carrie""",female,1.0,2.0,W./C. 6607,,S
889,26.0,30.0000,,1,890.0,1.0,"Behr, Mr. Karl Howell",male,0.0,0.0,111369,C148,C
