# Reverse Row Order



In [2]:
import seaborn as sns
import pandas as pd

In [3]:
df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [4]:
# In Reverse order
df.loc[::-1].head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
243,18.78,3.0,Female,No,Thur,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2
241,22.67,2.0,Male,Yes,Sat,Dinner,2
240,27.18,2.0,Female,Yes,Sat,Dinner,2
239,29.03,5.92,Male,No,Sat,Dinner,3


In [5]:
# reset only index agian
df.loc[::-1].reset_index(drop=True).head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,18.78,3.0,Female,No,Thur,Dinner,2
1,17.82,1.75,Male,No,Sat,Dinner,2
2,22.67,2.0,Male,Yes,Sat,Dinner,2
3,27.18,2.0,Female,Yes,Sat,Dinner,2
4,29.03,5.92,Male,No,Sat,Dinner,3


# Reverse Column Order

In [6]:
df.loc[:, ::-1].head()

Unnamed: 0,size,time,day,smoker,sex,tip,total_bill
0,2,Dinner,Sun,No,Female,1.01,16.99
1,3,Dinner,Sun,No,Male,1.66,10.34
2,3,Dinner,Sun,No,Male,3.5,21.01
3,2,Dinner,Sun,No,Male,3.31,23.68
4,4,Dinner,Sun,No,Female,3.61,24.59


In [7]:
# Select a column by its data type
df.dtypes

total_bill     float64
tip            float64
sex           category
smoker        category
day           category
time          category
size             int64
dtype: object

In [8]:
# only select those columns which have int data type
df.select_dtypes(include=['number']).head()

Unnamed: 0,total_bill,tip,size
0,16.99,1.01,2
1,10.34,1.66,3
2,21.01,3.5,3
3,23.68,3.31,2
4,24.59,3.61,4


In [9]:
# only select those columns which have category or other type
df.select_dtypes(include=['category', 'float']).head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time
0,16.99,1.01,Female,No,Sun,Dinner
1,10.34,1.66,Male,No,Sun,Dinner
2,21.01,3.5,Male,No,Sun,Dinner
3,23.68,3.31,Male,No,Sun,Dinner
4,24.59,3.61,Female,No,Sun,Dinner


In [10]:
# only these columns are excluded
x = df.select_dtypes(exclude=['float', 'int']).head()
x

Unnamed: 0,sex,smoker,day,time
0,Female,No,Sun,Dinner
1,Male,No,Sun,Dinner
2,Male,No,Sun,Dinner
3,Male,No,Sun,Dinner
4,Female,No,Sun,Dinner


In [11]:
x.dtypes

sex       category
smoker    category
day       category
time      category
dtype: object

In [12]:
df.dtypes

total_bill     float64
tip            float64
sex           category
smoker        category
day           category
time          category
size             int64
dtype: object

# Convert string to integer

In [13]:
df = pd.DataFrame({'col_A': ['1.8', '2.9', '3', '6', '7', '8', '9', '22'],
                   'col_B': ['4', '5', '6', '7', '8', '9', '10', '11']})
df

Unnamed: 0,col_A,col_B
0,1.8,4
1,2.9,5
2,3.0,6
3,6.0,7
4,7.0,8
5,8.0,9
6,9.0,10
7,22.0,11


In [14]:
df.dtypes

col_A    object
col_B    object
dtype: object

In [15]:
# convert data type
df.astype({'col_A': 'float64'}).dtypes

col_A    float64
col_B     object
dtype: object

In [16]:
# by other method 
pd.to_numeric(df['col_A'], errors='coerce')
pd.to_numeric(df['col_B'], errors='coerce')


0     4
1     5
2     6
3     7
4     8
5     9
6    10
7    11
Name: col_B, dtype: int64

# Reduce Dataframe size 

In [17]:
import seaborn as sns
df = sns.load_dataset('tips')
df.shape

(244, 7)

In [27]:
df.sample(frac=0.5).shape
df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.8 KB


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB
