# **Practice 8. Pandas**

In [1]:
import numpy as np
import pandas as pd
from pprint import pprint

## Low Level

### Task 1

In [123]:
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

dframe = pd.DataFrame({'first_column': ser1 , 'second_column': ser2})
dframe

Unnamed: 0,first_column,second_column
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


### Task 2

In [124]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
res = np.setdiff1d(ser1, ser2)
res

array([1, 2, 3], dtype=int64)

### Task 3

In [125]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])
ser.str.upper()

0     HOW
1      TO
2    KICK
3    ASS?
dtype: object

### Task 4

In [126]:
ser = pd.Series(np.random.normal(10, 5, 25))
min_ = ser.min()
percent_25 = ser.quantile(0.25)
percent_50 = ser.quantile(0.5)
percent_75 = ser.quantile(0.75)
print(ser)
print(f'Minimum: {min_}\n25th percentile: {percent_25} \nMedian: {percent_50}\n75th percentile: {percent_25}')

0     17.398154
1      4.493193
2     10.447645
3     16.076448
4      1.203583
5     15.722318
6      7.505888
7      2.057065
8     12.220450
9      6.827949
10    12.300609
11    15.346135
12    10.113205
13    16.214183
14    17.904318
15     3.544937
16    17.859056
17    12.263202
18    15.803909
19    12.176382
20     1.970907
21    10.803048
22    18.895086
23    10.660492
24     9.554483
dtype: float64
Minimum: 1.2035830903448073
25th percentile: 7.5058882329953365 
Median: 12.176382020407061
75th percentile: 7.5058882329953365


### Task 5

In [127]:
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])
pd.to_datetime(ser)

0   2010-01-01 00:00:00
1   2011-02-02 00:00:00
2   2012-03-03 00:00:00
3   2013-04-04 00:00:00
4   2014-05-05 00:00:00
5   2015-06-06 12:20:00
dtype: datetime64[ns]

## Medium Level

### Task 1

In [128]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
first = ser1[~ser1.isin(ser2)]
second = ser2[~ser2.isin(ser1)]
res = first.append(second, ignore_index=True)
print(res)

0    1
1    2
2    3
3    6
4    7
5    8
dtype: int64


### Task 2

In [129]:
def func_top_n(n):
    def top_n(ser):
        tops = ser.drop_duplicates().nlargest(n)
        ser[ser<tops.min()] = 'Other'
        return ser
    return top_n

np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))
top2 = func_top_n(2)
print(top2(ser))

0         3
1         3
2     Other
3         3
4         3
5     Other
6     Other
7     Other
8         3
9         3
10        3
11        4
dtype: object


### Task 3

In [130]:
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))
dframe = pd.DataFrame([*ser1, *ser2])
print('Vertically :')
pprint(dframe)
df = pd.concat([ser1,ser2], axis = 1)
print('Horizontally :')
pprint(df)

Vertically :
   0
0  0
1  1
2  2
3  3
4  4
5  a
6  b
7  c
8  d
9  e
Horizontally :
   0  1
0  0  a
1  1  b
2  2  c
3  3  d
4  4  e


### Task 4

In [131]:
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])
[pd.Index(ser1).get_loc(i) for i in ser2]

[5, 4, 0, 8]

### Task 5

In [132]:
fruit = pd.Series(np.random.choice(['apple', 'banana', 'carrot'], 10))
weights = pd.Series(np.linspace(1, 10, 10))
print(weights.tolist())
print(fruit.tolist())
weights.groupby(fruit).mean()

[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
['apple', 'carrot', 'apple', 'apple', 'carrot', 'carrot', 'apple', 'carrot', 'banana', 'apple']


apple     5.00
banana    9.00
carrot    5.25
dtype: float64

## Advanced Level

### Task 1

In [66]:
ser = pd.Series(['Apple', 'Orange', 'Plan', 'Python', 'Money'])
vowels = set('aeiou'.upper())
ser_up = ser.str.upper()
sers = [ser_up.str.count(i) for i in vowels]
sum_ser= sum(sers)
res = sum_ser.where(sum_ser>=2).dropna()
result = ser[res.index]
result

0     Apple
1    Orange
4     Money
dtype: object

### Task 2

In [74]:
emails = pd.Series(['buying books at amazon.com', 'rameses@egypt.com', 'matt@t.co', 'narendra@modi.com'])
pattern ='[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}'
emails.where(emails.str.contains(pattern, regex=True)).dropna()

1    rameses@egypt.com
2            matt@t.co
3    narendra@modi.com
dtype: object

### Task 3

In [135]:
ser = pd.Series([2, 10, 3, 4, 9, 10, 2, 7, 3])
from scipy.signal import argrelextrema
res = argrelextrema(ser.values, np.greater)[0]
res

array([1, 5, 7], dtype=int64)

### Task 4

In [136]:
from requests import get
from io import StringIO
url = 'https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv'
r = get(url)

In [161]:
data_csv = StringIO(r.text)
DataFrame = pd.read_csv(data_csv)
DataFrame['medv'] = DataFrame['medv'].map(lambda x: 'High' if x>25 else 'Low' if x<25 else x)
DataFrame

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,Low
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,Low
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,High
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,High
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,High
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,Low
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,Low
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,Low
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,Low


### Task 5

In [73]:
def swap_columns(df, col1, col2):
    if col1 == col2:
        return
    col_list = list(df)
    col1_index = col_list.index(col1)
    col2_index = col_list.index(col2)

    col_list[col1_index], col_list[col2_index] = col_list[col2_index], col_list[col1_index]
    df.columns = col_list 
    
df = pd.DataFrame(np.arange(20).reshape(-1, 5), columns=list('abcde'))
print(df)
swap_columns(df, 'a', 'd')
print(df)
df.sort_index(axis=1, ascending=False)

    a   b   c   d   e
0   0   1   2   3   4
1   5   6   7   8   9
2  10  11  12  13  14
3  15  16  17  18  19
    d   b   c   a   e
0   0   1   2   3   4
1   5   6   7   8   9
2  10  11  12  13  14
3  15  16  17  18  19


Unnamed: 0,e,d,c,b,a
0,4,0,2,1,3
1,9,5,7,6,8
2,14,10,12,11,13
3,19,15,17,16,18
