In [1]:
"""Adding new cases and variables"""
import pandas as pd

df = pd.DataFrame({'A': [2, 3, 1],
                  'B': [1, 2, 3],
                  'C': [5, 3, 4]})

df1 = pd.DataFrame({'A': [4],
                  'B': [4],
                  'C': [4]})

print("ADD NEW CASES IN ROW AND RESET INDEX MANUALY.")
print("Append row [4, 4, 4] to df.")
df = df.append(df1)
print(df)

print("\nReset index after appended.")
df = df.reset_index(drop=True)
print(df)


print("\n\nADD NEW CASES in ROW AND RESET INDEX AUTOMATYCLIY.")
print("Add row [5, 5, 5] to df.")
df.loc[df.last_valid_index()+1] = [5, 5, 5]
print(df)

print("\n\nADD NEW COLUMN.")
print("Add new variables 'D': [1, 2, 3, 4, 5]  to column in df.")
df2 = pd.DataFrame({'D': [1, 2, 3, 4, 5]})
df = pd.DataFrame.join(df, df2)
print(df)

ADD NEW CASES IN ROW AND RESET INDEX MANUALY.
Append row [4, 4, 4] to df.
   A  B  C
0  2  1  5
1  3  2  3
2  1  3  4
0  4  4  4

Reset index after appended.
   A  B  C
0  2  1  5
1  3  2  3
2  1  3  4
3  4  4  4


ADD NEW CASES in ROW AND RESET INDEX AUTOMATYCLIY.
Add row [5, 5, 5] to df.
   A  B  C
0  2  1  5
1  3  2  3
2  1  3  4
3  4  4  4
4  5  5  5


ADD NEW COLUMN.
Add new variables 'D': [1, 2, 3, 4, 5]  to column in df.
   A  B  C  D
0  2  1  5  1
1  3  2  3  2
2  1  3  4  3
3  4  4  4  4
4  5  5  5  5


In [2]:
"""Removing data."""
import pandas as pd

df = pd.DataFrame({'A': [2, 3, 1],
                  'B': [1, 2, 3,],
                  'C': [5, 3, 4]})

print("Remove second row from cases")
df = df.drop(df.index[[1]])
print(df)

print("\nRemove B column after removed second row from cases. We can remove column ")
df = df.drop('B', 1)
print(df)

Remove second row from cases
   A  B  C
0  2  1  5
2  1  3  4

Remove B column after removed second row from cases. We can remove column 
   A  C
0  2  5
2  1  4


In [3]:
"""Sorting and shuffling."""

import pandas as pd
import numpy as np

df = pd.DataFrame({'A': [2, 1, 2, 3, 3, 5, 4],
                  'B': [1, 2, 3, 5, 4, 2, 5],
                  'C': [5, 3, 4, 1, 1, 2, 3]})
print("Initial array.")
print(df)

print("\nSorted array.")
df = df.sort_values(by=['A', 'B'], ascending = [True, True])
print(df)

print("\nSave index to variable and shafle it.")
index = df.index.tolist()
np.random.shuffle(index)
print(index)

print("\nInsert saved variable index into the data frame.")
df = df.loc[index]
print(df)

print("\nReset the inserted index without shuffled df. If drop = False, it adds new column with orevious index.")
df = df.reset_index(drop=True)
print(df)

Initial array.
   A  B  C
0  2  1  5
1  1  2  3
2  2  3  4
3  3  5  1
4  3  4  1
5  5  2  2
6  4  5  3

Sorted array.
   A  B  C
1  1  2  3
0  2  1  5
2  2  3  4
4  3  4  1
3  3  5  1
6  4  5  3
5  5  2  2

Save index to variable and shafle it.
[0, 3, 1, 4, 2, 6, 5]

Insert saved variable index into the data frame.
   A  B  C
0  2  1  5
3  3  5  1
1  1  2  3
4  3  4  1
2  2  3  4
6  4  5  3
5  5  2  2

Reset the inserted index without shuffled df. If drop = False, it adds new column with orevious index.
   A  B  C
0  2  1  5
1  3  5  1
2  1  2  3
3  3  4  1
4  2  3  4
5  4  5  3
6  5  2  2


In [4]:
"""Aggregating Data at Any level."""

import pandas as pd
import numpy as np

df = pd.DataFrame({'Map': [0, 0, 0, 1, 1, 2, 2],
                  'Values': [1, 2, 3, 5, 4, 2, 5]})

# Sum - sum of values for the same Map values
df['S'] = df.groupby('Map')['Values'].transform(np.sum)

# Mean - mean of values for the same Map values 
df['M'] = df.groupby('Map')['Values'].transform(np.mean)

# Var - variance of values for the same Map values
# The variance is the average of the squared deviations from the mean
df['V'] = df.groupby('Map')['Values'].transform(np.var)

print(df)

   Map  Values  S    M    V
0    0       1  6  2.0  1.0
1    0       2  6  2.0  1.0
2    0       3  6  2.0  1.0
3    1       5  9  4.5  0.5
4    1       4  9  4.5  0.5
5    2       2  7  3.5  4.5
6    2       5  7  3.5  4.5
