# **DataFrame Basics II**

### **Filtering DataFrames by one Condition**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.info()

In [None]:
titanic.dtypes

In [None]:
titanic.age.head()

In [None]:
titanic.sex == "male"

In [None]:
males1 = titanic[titanic.sex == "male"]

In [None]:
males1.head()

In [None]:
male_filter = titanic.sex == "male"
males2 = titanic.loc[male_filter]

In [None]:
males2.head()

In [None]:
males3 = titanic.loc[male_filter, ["age", "fare"]]

In [None]:
males3.head()

In [None]:
female_filter = titanic.sex == "female"
females = titanic[female_filter]

In [None]:
females

In [None]:
males2.dtypes

In [None]:
object_filter = males2.dtypes == object
males2_numbers = males2.loc[:, ~object_filter]

In [None]:
males2_numbers

In [None]:
males = titanic.loc[male_filter, ~object_filter]

In [None]:
males.head()

### **Filtering DataFrames by many Condition (AND)**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
males_filter = titanic['sex'] == "male"
males = titanic.loc[males_filter]
males.head()

In [None]:
age_filter = titanic['age'] > 14
grown_ups = titanic.loc[age_filter]
grown_ups.head()

In [None]:
grown_males = titanic.loc[males_filter & age_filter]
grown_males.head()

In [None]:
grown_males.describe()

### **Filtering DataFrames by many Condition (OR)**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')
titanic.head()

In [None]:
females_filter = titanic.sex == 'female'
females_filter.head()

In [None]:
child_filter = titanic['age'] < 14
child_filter.head()

In [None]:
woman_or_child = titanic.loc[females_filter | child_filter]

In [None]:
woman_or_child.head()

In [None]:
woman_or_child.describe()

In [None]:
pd.read_csv('summer.csv')

## **Advanced Filtering with between(), isin() and ~**

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv('summer.csv')

In [None]:
summer.head()

In [None]:
summer.info()

In [None]:
games_1988 = summer.loc[summer.Year == 1988]

In [None]:
games_1988.head()

In [None]:
games_1988.info()

In [None]:
games_since_1992 = summer.loc[summer.Year >= 1992]

In [None]:
games_since_1992

In [None]:
games_since_1992.head()

In [None]:
games_since_1992.tail()

In [None]:
games_90s = summer[summer.Year.between(left = 1990, right = 1999, inclusive = 'both')]

In [None]:
games_90s

In [None]:
selected_games = summer.loc[summer.Year.isin([1976, 1988])]

In [None]:
selected_games

In [None]:
discipline = selected_games.Discipline == 'Weightlifting'
country = selected_games.Country == 'BUL'
selected_games.loc[discipline & country]

## **any()** and **all()**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
(titanic.sex == 'male').any()

In [None]:
(titanic.sex == 'male').all()

In [None]:
(titanic.age == 80.0).any()

In [None]:
titanic.loc[titanic.age == 80.0]

In [None]:
pd_series = pd.Series([1, 5, -5, 0, -1, 0.5])

In [None]:
pd_series.any()

In [None]:
pd_series.all()

In [None]:
fares = titanic.fare.unique()

In [None]:
fares.sort()

In [None]:
fares = pd.Series(fares)

In [None]:
fares.head()

In [None]:
titanic.fare.all()

## **Removing columns**

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv('summer.csv')

In [None]:
summer.head()

In [None]:
summer.drop(columns = 'Sport')

In [None]:
summer.head()

In [None]:
summer.drop(columns = ['Sport', 'Discipline'])

In [None]:
summer.head()

In [None]:
summer.drop(labels = 'Event', axis = 1)
# summer.drop(labels = 'Event', axis = "columns") - alternative approach

In [None]:
summer.head()

In [None]:
del summer['Sport']

In [None]:
summer.head()

In [None]:
summer = pd.read_csv('summer.csv')

In [None]:
summer.head()

In [None]:
summer.loc[ :, ['Year', 'City', 'Athlete', 'Country', 'Gender', 'Event', 'Medal']]

## **Removing rows**

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv('summer.csv',index_col = 'Athlete')

In [None]:
summer.head(10)

In [None]:
summer.drop(index = 'HAJOS, Alfred').head(10)

In [None]:
summer.drop(index = ['HAJOS, Alfred', 'DRIVAS, Dimitrios']).head(10)

In [None]:
summer.drop(labels = "HERSCHMANN, Otto", axis = 0).head(10)

In [None]:
summer.loc[summer.Year == 1996]

In [None]:
filter1 = summer.Year == 1996
filter2 = summer.Sport == 'Aquatics'
summer.loc[~(filter1 | filter2)]

## **Adding new Columns to a DataFrame**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic['Zeros']

In [None]:
titanic['Zeros'] = 0

In [None]:
titanic.head()

In [None]:
titanic['Zeros_2'] = '0'

In [None]:
titanic.head()

In [None]:
titanic.dtypes

In [None]:
titanic.Ones

In [None]:
titanic.Ones = 1

In [None]:
titanic.head()

In [None]:
titanic.Ones

## **Creating Columns based on other Columns**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
1912 - titanic.age

In [None]:
titanic['YoB'] = 1912 - titanic.age

In [None]:
titanic.head()

In [None]:
titanic['relatives'] = titanic['sibsp'] + titanic['parch']

In [None]:
titanic.head()

In [None]:
titanic.drop(columns = ['sibsp', 'parch'], inplace = True)

In [None]:
titanic.head()

In [None]:
inflation_factor = 10

In [None]:
titanic['ia_fare'] = titanic.fare * inflation_factor

In [None]:
titanic.head()

## **Adding Columns with insert()**

In [1]:
import pandas as pd

In [2]:
titanic = pd.read_csv('titanic.csv')

In [3]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [4]:
relatives = titanic['sibsp'] + titanic['parch']

In [5]:
relatives.head()

0    1
1    1
2    0
3    1
4    0
dtype: int64

In [6]:
titanic.insert(column = 'relatives', value = relatives, loc = 6)

In [7]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,relatives,fare,embarked,deck
0,0,3,male,22.0,1,0,1,7.25,S,
1,1,1,female,38.0,1,0,1,71.2833,C,C
2,1,3,female,26.0,0,0,0,7.925,S,
3,1,1,female,35.0,1,0,1,53.1,S,C
4,0,3,male,35.0,0,0,0,8.05,S,


## **Creating DataFrames from Scratch with pd.DataFrame()**

In [8]:
import pandas as pd