In [47]:
import pandas as pd
import numpy as np
import random

## Creation And Appending Col

In [61]:
rows = 40
test = pd.DataFrame(np.random.rand(rows,4))
test = test.rename(columns={0 : 'First', 1 : 'Second', 2 : 'Third', 3 : 'Fourth'})
test['Letters'] = random.choices(['A', 'B', 'C'], k=rows)

In [85]:
test.head(3)

Unnamed: 0,First,Second,Third,Fourth,Letters
0,0.32612,0.270747,0.223164,0.511374,B
1,0.885552,0.396075,0.546918,0.769559,B
2,0.272539,0.310817,0.004841,0.671487,C


# Stats And Column Correlation

In [63]:
test.describe()

Unnamed: 0,First,Second,Third,Fourth
count,40.0,40.0,40.0,40.0
mean,0.539003,0.559781,0.460424,0.535928
std,0.295469,0.272038,0.293742,0.255936
min,0.000999,0.074894,0.00252,0.017619
25%,0.333704,0.332991,0.219869,0.327932
50%,0.57694,0.517386,0.459933,0.562842
75%,0.775581,0.777104,0.691623,0.686016
max,0.957506,0.982695,0.988651,0.976185


In [86]:
test.corr()

Unnamed: 0,First,Second,Third,Fourth
First,1.0,-0.280382,0.0325,0.035918
Second,-0.280382,1.0,-0.10067,-0.197873
Third,0.0325,-0.10067,1.0,-0.124244
Fourth,0.035918,-0.197873,-0.124244,1.0


In [64]:
test['Letters'].value_counts()

B    15
C    14
A    11
Name: Letters, dtype: int64

## Filtering And Subsets

In [65]:
sub_test = test[['First', 'Second']]
sub_test.head(3)

Unnamed: 0,First,Second
0,0.32612,0.270747
1,0.885552,0.396075
2,0.272539,0.310817


In [66]:
filtered = test[(test['First']>0.5) & ((test['Letters'] == 'C') | (test['Letters'] == 'D'))]
filtered.describe()

Unnamed: 0,First,Second,Third,Fourth
count,6.0,6.0,6.0,6.0
mean,0.666485,0.704682,0.406482,0.50463
std,0.139671,0.268261,0.297889,0.160098
min,0.500508,0.33549,0.00252,0.330577
25%,0.582448,0.500818,0.229234,0.377211
50%,0.649861,0.777859,0.393597,0.478553
75%,0.719263,0.903752,0.587134,0.628916
max,0.896309,0.982695,0.823089,0.717633


## Grouping And Pivoting

In [67]:
test.groupby('Letters').agg(np.mean)

Unnamed: 0_level_0,First,Second,Third,Fourth
Letters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,0.548252,0.522337,0.401937,0.474739
B,0.644586,0.469623,0.510892,0.578143
C,0.418611,0.685798,0.452305,0.538774


In [69]:
test.pivot_table(index='Letters', values=['First','Second'],aggfunc=np.mean)

Unnamed: 0_level_0,First,Second
Letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0.548252,0.522337
B,0.644586,0.469623
C,0.418611,0.685798


## Joining

In [80]:
test2 = pd.DataFrame(np.random.rand(rows,2))
test2 = test2.rename(columns={0 : 'Extra', 1 : 'More' })
test2['More-Letters'] = random.choices(['A','B','F'], k=rows)

In [81]:
big = pd.concat([test, test2], axis=1)
big.head(2)

Unnamed: 0,First,Second,Third,Fourth,Letters,Extra,More,More-Letters
0,0.32612,0.270747,0.223164,0.511374,B,0.685181,0.408171,B
1,0.885552,0.396075,0.546918,0.769559,B,0.583939,0.296496,F


In [82]:
test.set_index('Letters').join(test2.set_index('More-Letters'))

Unnamed: 0,First,Second,Third,Fourth,Extra,More
A,0.913613,0.256746,0.967909,0.953704,0.043765,0.414215
A,0.913613,0.256746,0.967909,0.953704,0.340649,0.794225
A,0.913613,0.256746,0.967909,0.953704,0.759291,0.176276
A,0.913613,0.256746,0.967909,0.953704,0.243687,0.442831
A,0.913613,0.256746,0.967909,0.953704,0.573999,0.597043
...,...,...,...,...,...,...
C,0.700241,0.740274,0.364710,0.415463,,
C,0.001109,0.937937,0.769465,0.021321,,
C,0.043216,0.959057,0.738131,0.517016,,
C,0.436670,0.090161,0.612788,0.561160,,
