# Team Pandas Series

In [1]:
# imports

import pandas as pd
import numpy as np

---

### Kombat 1: How to create a series from a list, numpy array and dict?

Create a pandas series from each of the items below: a list, numpy and a dictionary

In [2]:
# Input

mylist = list('abcde')
myarr = np.arange(5)
mydict = dict(zip(mylist, myarr))

In [3]:
# Fight!!!

mylistserie = pd.Series(mylist)
myarrserie = pd.Series(myarr)
mydictserie = pd.Series(mydict)

In [4]:
# Output

print(mylistserie)
print(myarrserie)
print(mydictserie)

0    a
1    b
2    c
3    d
4    e
dtype: object
0    0
1    1
2    2
3    3
4    4
dtype: int32
a    0
b    1
c    2
d    3
e    4
dtype: int32


---

### Kombat 2: How to get the items of series A not present in series B?

From ser1 remove items present in ser2.

In [5]:
# Input

ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [6]:
# Fight

ser1 = ser1[ser1.isin(ser2)==False]

In [7]:
# Output

ser1

0    1
1    2
2    3
dtype: int64

---

### Kombat 3: How to bin a numeric series to 10 groups of equal size?

Bin the series ser into 10 equal deciles and replace the values with the bin name.

In [8]:
# Input

ser = pd.Series(np.random.random(20))

In [9]:
# Fight

rank = pd.qcut(ser, q = [0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1],
               labels = ['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th'])

In [10]:
# Output

print(ser)
print(rank)

0     0.946921
1     0.131056
2     0.050999
3     0.259843
4     0.445200
5     0.881474
6     0.177951
7     0.810994
8     0.338666
9     0.452999
10    0.429994
11    0.982426
12    0.195363
13    0.931526
14    0.278215
15    0.642476
16    0.814131
17    0.490758
18    0.868886
19    0.788702
dtype: float64
0     10th
1      1st
2      1st
3      3rd
4      5th
5      9th
6      2nd
7      7th
8      4th
9      5th
10     4th
11    10th
12     2nd
13     9th
14     3rd
15     6th
16     8th
17     6th
18     8th
19     7th
dtype: category
Categories (10, object): ['1st' < '2nd' < '3rd' < '4th' ... '7th' < '8th' < '9th' < '10th']


---

### Kombat 4: How to convert a numpy array to a dataframe of given shape?

Reshape the series ser into a dataframe with 7 rows and 5 columns.

In [11]:
# Input

ser = pd.Series(np.random.randint(1, 10, 35))

In [12]:
# Fight

serpd = pd.DataFrame(ser.values.reshape(7,5))

In [13]:
# Output

serpd

Unnamed: 0,0,1,2,3,4
0,2,2,4,1,7
1,7,8,7,6,1
2,5,6,7,2,1
3,3,4,4,6,6
4,8,1,9,3,2
5,9,2,4,5,8
6,9,5,7,9,4


---

### Kombat 5: How to create a TimeSeries starting ‘2022-01-02’ and 10 weekends (sundays) after that, having random numbers as values?

In [14]:
# Input

time_series = pd.Series(np.random.randint(1, 10, 10))

In [15]:
# Fight

time_series = pd.Series(np.random.randint(1, 10, 10),  pd.date_range('2022-01-02', periods = 10, freq = 'W-SUN'))

In [16]:
# Output

time_series

2022-01-02    6
2022-01-09    1
2022-01-16    9
2022-01-23    3
2022-01-30    3
2022-02-06    4
2022-02-13    7
2022-02-20    5
2022-02-27    1
2022-03-06    9
Freq: W-SUN, dtype: int32

---

### Kombat 6: How to change column values when importing csv to a dataframe?

Import the boston housing dataset, but while importing change the 'medv' (median house value) column so that values < 25 becomes ‘Low’ and > 25 becomes ‘High’.

In [17]:
# Input

url = 'https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv'

In [18]:
# Fight

df = pd.read_csv(url, converters = {'medv':lambda x: 'Low' if float(x) < 25 else 'High'})

In [19]:
# Solution

df

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,Low
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,Low
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,High
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,High
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,High
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,Low
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,Low
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,Low
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,Low


---

### Kombat 7: How to get the nrows, ncolumns, datatype, summary stats of each column of a dataframe?

Get the number of rows, columns, datatype and summary statistics of each column of the Cars93 dataset.

In [20]:
# Input

url = 'https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv'

In [21]:
#Fight

df = pd.read_csv(url)
numberofcols = len(df.columns)
numberofrows = len (df.index)
datatype = df.dtypes
summarystats = df.describe()

In [22]:
# Output

print(numberofcols)
print(numberofrows)
print(datatype)
print(summarystats)

27
93
Manufacturer           object
Model                  object
Type                   object
Min.Price             float64
Price                 float64
Max.Price             float64
MPG.city              float64
MPG.highway           float64
AirBags                object
DriveTrain             object
Cylinders              object
EngineSize            float64
Horsepower            float64
RPM                   float64
Rev.per.mile          float64
Man.trans.avail        object
Fuel.tank.capacity    float64
Passengers            float64
Length                float64
Wheelbase             float64
Width                 float64
Turn.circle           float64
Rear.seat.room        float64
Luggage.room          float64
Weight                float64
Origin                 object
Make                   object
dtype: object
       Min.Price      Price  Max.Price   MPG.city  MPG.highway  EngineSize  \
count  86.000000  91.000000  88.000000  84.000000    91.000000   91.000000   
mean   17.1186

---

### Kombat 8: How to slice a DataFrame by column value?

Get every Chevrolet car with a EngineSize lower than 3.0 from the Cars93 dataset.

In [23]:
# Input

url = 'https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv'

In [24]:
# Fight

df = pd.read_csv(url)
chevy = df[(df['Manufacturer'] == 'Chevrolet') & (df['EngineSize'] < 3)]

In [25]:
# Output

chevy

Unnamed: 0,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,...,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Weight,Origin,Make
11,Chevrolet,Cavalier,Compact,8.5,13.4,18.3,25.0,36.0,,,...,5.0,182.0,101.0,66.0,38.0,25.0,13.0,2490.0,USA,Chevrolet Cavalier
12,Chevrolet,Corsica,Compact,11.4,11.4,11.4,25.0,34.0,Driver only,Front,...,5.0,184.0,103.0,68.0,39.0,26.0,,2785.0,USA,Chevrolet Corsica
14,Chevrolet,Lumina,Midsize,13.4,15.9,18.4,,29.0,,Front,...,6.0,,108.0,71.0,,28.5,16.0,3195.0,USA,Chevrolet Lumina


---

# FINISH HIM!!!