# Team Pandas Series

In [1]:
# imports
import pandas as pd
import numpy as np



---

### Kombat 1: How to create a series from a list, numpy array and dict?

Create a pandas series from each of the items below: a list, numpy and a dictionary

In [2]:
# Your code

mylist = list('abcde')
myarr = np.arange(5)
mydict = dict(zip(mylist, myarr))
print(mylist)
print(myarr)
print(mydict)



['a', 'b', 'c', 'd', 'e']
[0 1 2 3 4]
{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4}


In [3]:
# Solution

mylistserie = pd.Series(mylist)
myarrserie = pd.Series(myarr)
mydictserie = pd.Series(mydict)


print(mylistserie)
print(myarrserie)
print(mydictserie)

0    a
1    b
2    c
3    d
4    e
dtype: object
0    0
1    1
2    2
3    3
4    4
dtype: int64
a    0
b    1
c    2
d    3
e    4
dtype: int64


---

### Kombat 2: How to get the items of series A not present in series B?

From ser1 remove items present in ser2.

In [4]:
# Your code

ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])


ser1.isin(ser2)==False

0     True
1     True
2     True
3    False
4    False
dtype: bool

In [5]:
# Solution

ser1 = ser1[ser1.isin(ser2)==False]
ser1


0    1
1    2
2    3
dtype: int64

---

### Kombat 3: How to bin a numeric series to 10 groups of equal size?

Bin the series ser into 10 equal deciles and replace the values with the bin name.

In [6]:
# Your code
ser = pd.Series(np.random.random(20))
ser_sort = ser.sort_values()
ser_sort
cont = 0
bins = {}
myarr = []
index = 0
for i in ser_sort:
    cont += 1
    myarr.append(i)
    if cont % 2 == 0:
        bins[index] = myarr
        myarr = []
        index += 1
    
print(ser)
bins


0     0.521633
1     0.623419
2     0.060227
3     0.506064
4     0.879313
5     0.255040
6     0.834693
7     0.552473
8     0.147937
9     0.865201
10    0.002352
11    0.370619
12    0.011489
13    0.566397
14    0.527220
15    0.634396
16    0.390432
17    0.605059
18    0.428530
19    0.621897
dtype: float64


{0: [0.0023520384546136253, 0.011489248178135791],
 1: [0.060226866438361726, 0.14793703398646685],
 2: [0.2550400904267347, 0.37061929724807796],
 3: [0.3904321904639164, 0.4285299307848406],
 4: [0.5060644953408422, 0.5216334857584513],
 5: [0.5272201066406323, 0.5524732186149769],
 6: [0.5663966655424174, 0.6050586380662445],
 7: [0.6218972040329234, 0.6234186484272014],
 8: [0.6343961890103783, 0.8346933877593286],
 9: [0.8652014040731469, 0.8793128111333124]}

In [7]:
ser = pd.Series(np.random.random(20))
rank = pd.qcut(ser, 
               q = [0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1], 
               labels = ['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th'])

rank


0     10th
1      8th
2      7th
3      7th
4     10th
5      1st
6      5th
7      2nd
8      3rd
9      5th
10     9th
11     2nd
12     4th
13     3rd
14     9th
15     1st
16     4th
17     6th
18     8th
19     6th
dtype: category
Categories (10, object): ['1st' < '2nd' < '3rd' < '4th' ... '7th' < '8th' < '9th' < '10th']

In [8]:
print(ser.head())
print(rank.head())

0    0.959965
1    0.798936
2    0.739373
3    0.756131
4    0.940999
dtype: float64
0    10th
1     8th
2     7th
3     7th
4    10th
dtype: category
Categories (10, object): ['1st' < '2nd' < '3rd' < '4th' ... '7th' < '8th' < '9th' < '10th']


---

### Kombat 4: How to convert a numpy array to a dataframe of given shape?

Reshape the series ser into a dataframe with 7 rows and 5 columns.

In [9]:
# Your code 

ser = pd.Series(np.random.randint(1, 10, 35))
serpd = pd.DataFrame(ser.values.reshape((7, 5)))
serpd


Unnamed: 0,0,1,2,3,4
0,5,4,8,9,6
1,1,7,5,8,3
2,2,5,1,2,9
3,3,2,8,3,8
4,6,6,9,2,3
5,9,5,7,7,6
6,8,8,7,1,3


In [10]:
# Solution

serpd

Unnamed: 0,0,1,2,3,4
0,5,4,8,9,6
1,1,7,5,8,3
2,2,5,1,2,9
3,3,2,8,3,8
4,6,6,9,2,3
5,9,5,7,7,6
6,8,8,7,1,3


---

### Kombat 5: How to create a TimeSeries starting ‘2022-01-02’ and 10 weekends (sundays) after that, having random numbers as values?

In [11]:
# Your code

time_series = pd.Series(np.random.randint(1, 10, 10),  )

dti = pd.date_range(start="2022-01-02", periods=10, freq='7d')
time_series_solution = pd.Series(np.random.randint(1, 10, 10), index=dti)
time_series_solution



2022-01-02    8
2022-01-09    2
2022-01-16    1
2022-01-23    4
2022-01-30    6
2022-02-06    2
2022-02-13    4
2022-02-20    4
2022-02-27    8
2022-03-06    9
Freq: 7D, dtype: int64

In [30]:
# Solution

time_series = time_series_solution
time_series

2022-01-02    8
2022-01-09    2
2022-01-16    1
2022-01-23    4
2022-01-30    6
2022-02-06    2
2022-02-13    4
2022-02-20    4
2022-02-27    8
2022-03-06    9
Freq: 7D, dtype: int64

---

### Kombat 6: How to change column values when importing csv to a dataframe?

Import the boston housing dataset, but while importing change the 'medv' (median house value) column so that values < 25 becomes ‘Low’ and > 25 becomes ‘High’.

In [31]:
# Your code

def change_medv(x):
    if x <= 25:
        return "low"
    else:
        return "high"

    
url = 'https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv'

dataframe = pd.read_csv(url)
dataframe['medv'] = dataframe.apply(lambda x:change_medv(x['medv']), axis=1 )
dataframe


Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,low
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,low
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,high
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,high
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,high
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,low
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,low
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,low
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,low


In [32]:
url = 'https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv'

df = pd.read_csv(url, converters = {'medv': lambda x: 'low' if float(x) < 25 else 'high'})
df

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,low
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,low
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,high
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,high
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,high
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,low
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,low
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,low
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,low


In [33]:
# Solution

df

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,low
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,low
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,high
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,high
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,high
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,low
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,low
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,low
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,low


---

### Kombat 7: How to get the nrows, ncolumns, datatype, summary stats of each column of a dataframe?

Get the number of rows, columns, datatype and summary statistics of each column of the Cars93 dataset.

In [34]:
# Your code

url = 'https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv'

data1 = pd.read_csv(url)

numberofcols = list(data1.columns)
numberofrows = len(data1)
datatype = data1.dtypes
summarystats = data1.describe()


In [35]:
# Solution

print(numberofcols)
print(numberofrows)
print(datatype)
print(summarystats)

['Manufacturer', 'Model', 'Type', 'Min.Price', 'Price', 'Max.Price', 'MPG.city', 'MPG.highway', 'AirBags', 'DriveTrain', 'Cylinders', 'EngineSize', 'Horsepower', 'RPM', 'Rev.per.mile', 'Man.trans.avail', 'Fuel.tank.capacity', 'Passengers', 'Length', 'Wheelbase', 'Width', 'Turn.circle', 'Rear.seat.room', 'Luggage.room', 'Weight', 'Origin', 'Make']
93
Manufacturer           object
Model                  object
Type                   object
Min.Price             float64
Price                 float64
Max.Price             float64
MPG.city              float64
MPG.highway           float64
AirBags                object
DriveTrain             object
Cylinders              object
EngineSize            float64
Horsepower            float64
RPM                   float64
Rev.per.mile          float64
Man.trans.avail        object
Fuel.tank.capacity    float64
Passengers            float64
Length                float64
Wheelbase             float64
Width                 float64
Turn.circle       

---

### Kombat 8: How to slice a DataFrame by column value?

Get every Chevrolet car with a Horsepower lower than 3.0 from the Cars93 dataset.

In [36]:
# Your code

url = 'https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv'

dataframe = pd.read_csv(url)

dataframe = dataframe[(dataframe['Manufacturer'] == 'Chevrolet') &
                       (dataframe['EngineSize'] < 3.0)]
dataframe



Unnamed: 0,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,...,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Weight,Origin,Make
11,Chevrolet,Cavalier,Compact,8.5,13.4,18.3,25.0,36.0,,,...,5.0,182.0,101.0,66.0,38.0,25.0,13.0,2490.0,USA,Chevrolet Cavalier
12,Chevrolet,Corsica,Compact,11.4,11.4,11.4,25.0,34.0,Driver only,Front,...,5.0,184.0,103.0,68.0,39.0,26.0,,2785.0,USA,Chevrolet Corsica
14,Chevrolet,Lumina,Midsize,13.4,15.9,18.4,,29.0,,Front,...,6.0,,108.0,71.0,,28.5,16.0,3195.0,USA,Chevrolet Lumina


In [37]:
# Solution

chevy = dataframe
chevy

Unnamed: 0,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,...,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Weight,Origin,Make
11,Chevrolet,Cavalier,Compact,8.5,13.4,18.3,25.0,36.0,,,...,5.0,182.0,101.0,66.0,38.0,25.0,13.0,2490.0,USA,Chevrolet Cavalier
12,Chevrolet,Corsica,Compact,11.4,11.4,11.4,25.0,34.0,Driver only,Front,...,5.0,184.0,103.0,68.0,39.0,26.0,,2785.0,USA,Chevrolet Corsica
14,Chevrolet,Lumina,Midsize,13.4,15.9,18.4,,29.0,,Front,...,6.0,,108.0,71.0,,28.5,16.0,3195.0,USA,Chevrolet Lumina


---

# FINISH HIM