### Working with Series

In [1]:
import pandas as pd

### Series
- Represents a one dimensional labeled array or single column of data.
- Data types should be consistent since it creates a column essentially.
- In order to create a Series, use a constructor method: Series with dot notation.

#### Creating a Series object from a list.

In [2]:
# Series with strings
ice_cream = ['chocolate', 'vanilla', 'strawberry', 'rum raisin']

pd.Series(ice_cream)

0     chocolate
1       vanilla
2    strawberry
3    rum raisin
dtype: object

In [3]:
# create another series using int's with the constructor method (you can also use booleans - True/False)
lottery = [1, 5, 19, 51, 7, 43, 12]

pd.Series(lottery)

0     1
1     5
2    19
3    51
4     7
5    43
6    12
dtype: int64

#### Creating a Series object from a dictionary.
- Pandas will take the keys and auto create them as indexes.

In [5]:
webster = {'Ardvark': 'Animal',
           'Banana': 'Fruit',
           'Cyan': 'Color'}

pd.Series(webster)

Ardvark    Animal
Banana      Fruit
Cyan        Color
dtype: object

#### Attributes on a Series object
- Objects in Python have attributes and methods.
- A series is an object and therefore has many methods: attributes do not modify the object, they provide info whereas methods do apply a calculation or do task such as manipulate a string.

In [6]:
# create a series, store in a varibale to reuse
s_char = ['Tall', 'Smart', 'Charming', 'Humble', 'Social']

# convert to series
s = pd.Series(s_char)
s

0        Tall
1       Smart
2    Charming
3      Humble
4      Social
dtype: object

#### Access the attributes - press tab after the dot to see the methods and attributes
- Attributes do NOT require ( )
- Methods do require ( )
- .columns: shows column names
- .shape: show rows nd col counts

In [7]:
# Values - returns an array
s.values

array(['Tall', 'Smart', 'Charming', 'Humble', 'Social'], dtype=object)

In [8]:
# Index - retruns the range and the step, default is 1 which is the increment
s.index

RangeIndex(start=0, stop=5, step=1)

In [11]:
# returns O for object
s.dtype

dtype('O')

#### Methods on a Series object
- Pressing tab after the dot will reveal a list of methods and attributes.

In [12]:
# create new series
prices = [2.99, 4.45, 3.36]

p = pd.Series(prices)

p

0    2.99
1    4.45
2    3.36
dtype: float64

In [13]:
# call sum method on p series - adds all values of the prices
p.sum()

10.8

In [14]:
# call product the method on p series - multiplies all values against one another
p.product()

44.706480000000006

In [15]:
# call the mean method for an average over the series which is the sum divided by the length
p.mean()

3.6

In [16]:
# call the median
p.median()

3.36

In [17]:
# standard deviation
p.std()

0.759012516365837

#### Parameters and arguments
- for methods
- The parameter is the name of the argument and the argument is the choice we choose. Example: .sort_values(by= "colname", ascending=False)
- Indexes don't have to be unique.

In [19]:
# create 2 lists, use weekdays as the index by adding it in the index= position for an argument
fruits = ['apple', 'orange', 'grape','plum', 'blueberry' ]

weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']

pd.Series(fruits, weekdays)

# alternatively youcan write out the parameter, writing the explicit arg allows you skip the sequence 

pd.Series(data=fruits, index=weekdays)

Monday           apple
Tuesday         orange
Wednesday        grape
Thursday          plum
Friday       blueberry
dtype: object

#### Create a Series from a dataset with pd.read_csv( ) function

In [36]:
# import the data
p = pd.read_csv('../datasets/pokemon.csv')

p.head()

Unnamed: 0,Pokemon,Type
0,Bulbasaur,Grass
1,Ivysaur,Grass
2,Venusaur,Grass
3,Charmander,Fire
4,Charmeleon,Fire


In [39]:
# pull one col to create a series by using the parametr usecols and the argument for the col
# You have to use the parameter squeeze=True to make it a series
p = pd.read_csv('../datasets/pokemon.csv', usecols=['Pokemon'], squeeze=True)

p.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [44]:
# get the counts - 721 rows
p.shape

(721,)

#### Head and Tail methods
- head( ) - returns the 1st 5
- tail returns the last 5

In [53]:
stock = pd.read_csv('../datasets/google_stock_price.csv',usecols=['Stock Price'], squeeze=True)

# preview the 1st 5 rows - chnage the row num by entering in the ( )
stock.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [54]:
# preview the last 5 rows
stock.tail()

3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, dtype: float64

In [55]:
# get the average
stock.mean()

334.31009296148744

#### Passing objects into built in functions
- use Pokemon and Stock series to demonstrate functions.

In [58]:
# get the number of rows from pokemon and stock
len(p)
len(stock)

3012

In [59]:
# get the type
type(stock)

pandas.core.series.Series

In [67]:
# retrun a sorted list - works the saem for numeric, in ascending order
sortp = sorted(p)

sortp

['Abomasnow',
 'Abra',
 'Absol',
 'Accelgor',
 'Aegislash',
 'Aerodactyl',
 'Aggron',
 'Aipom',
 'Alakazam',
 'Alomomola',
 'Altaria',
 'Amaura',
 'Ambipom',
 'Amoonguss',
 'Ampharos',
 'Anorith',
 'Arbok',
 'Arcanine',
 'Arceus',
 'Archen',
 'Archeops',
 'Ariados',
 'Armaldo',
 'Aromatisse',
 'Aron',
 'Articuno',
 'Audino',
 'Aurorus',
 'Avalugg',
 'Axew',
 'Azelf',
 'Azumarill',
 'Azurill',
 'Bagon',
 'Baltoy',
 'Banette',
 'Barbaracle',
 'Barboach',
 'Basculin',
 'Bastiodon',
 'Bayleef',
 'Beartic',
 'Beautifly',
 'Beedrill',
 'Beheeyem',
 'Beldum',
 'Bellossom',
 'Bellsprout',
 'Bergmite',
 'Bibarel',
 'Bidoof',
 'Binacle',
 'Bisharp',
 'Blastoise',
 'Blaziken',
 'Blissey',
 'Blitzle',
 'Boldore',
 'Bonsly',
 'Bouffalant',
 'Braixen',
 'Braviary',
 'Breloom',
 'Bronzong',
 'Bronzor',
 'Budew',
 'Buizel',
 'Bulbasaur',
 'Buneary',
 'Bunnelby',
 'Burmy',
 'Butterfree',
 'Cacnea',
 'Cacturne',
 'Camerupt',
 'Carbink',
 'Carnivine',
 'Carracosta',
 'Carvanha',
 'Cascoon',
 'Castform',


In [70]:
# turns series into a list -showing fiorst 5 by using index slice
list(sortp)[0:5]

['Abomasnow', 'Abra', 'Absol', 'Accelgor', 'Aegislash']

In [74]:
# use the dict function to turn google stock into a dict, index are keys
s = dict(stock)

s

{0: 50.12,
 1: 54.1,
 2: 54.65,
 3: 52.38,
 4: 52.95,
 5: 53.9,
 6: 53.02,
 7: 50.95,
 8: 51.13,
 9: 50.07,
 10: 50.7,
 11: 49.95,
 12: 50.74,
 13: 51.1,
 14: 51.1,
 15: 52.61,
 16: 53.7,
 17: 55.69,
 18: 55.94,
 19: 56.93,
 20: 58.69,
 21: 59.62,
 22: 58.86,
 23: 59.13,
 24: 60.35,
 25: 59.86,
 26: 59.07,
 27: 63.37,
 28: 65.47,
 29: 64.74,
 30: 66.22,
 31: 67.46,
 32: 69.12,
 33: 68.47,
 34: 69.36,
 35: 68.8,
 36: 67.56,
 37: 68.63,
 38: 70.38,
 39: 70.93,
 40: 71.98,
 41: 74.51,
 42: 73.9,
 43: 70.17,
 44: 74.62,
 45: 86.13,
 46: 93.61,
 47: 90.81,
 48: 92.89,
 49: 96.55,
 50: 95.22,
 51: 97.92,
 52: 97.34,
 53: 95.74,
 54: 92.26,
 55: 84.59,
 56: 86.19,
 57: 84.27,
 58: 83.85,
 59: 91.42,
 60: 90.91,
 61: 92.34,
 62: 86.19,
 63: 86.16,
 64: 83.69,
 65: 84.62,
 66: 82.47,
 67: 83.68,
 68: 87.29,
 69: 89.61,
 70: 90.43,
 71: 90.9,
 72: 89.89,
 73: 89.61,
 74: 90.11,
 75: 88.06,
 76: 85.63,
 77: 84.91,
 78: 86.63,
 79: 85.74,
 80: 85.14,
 81: 89.26,
 82: 89.8,
 83: 88.15,
 84: 89.95,


In [76]:
# get the min/max from google - max retruns largest value, min retruns smallest value
min(stock)

max(stock)

782.22

#### Sort_values( )
- Default is ascending order
- To change default, use ascending = False
- A series or DF isnt modifed permanently unless you reassign or use the parameter inplace=True.

In [77]:
# re import series 
stock = pd.read_csv('../datasets/google_stock_price.csv',usecols=['Stock Price'], squeeze=True)
p = pd.read_csv('../datasets/pokemon.csv', usecols=['Pokemon'], squeeze=True)

In [84]:
# sort the values in scending order- strings will be alphabetical
# we can method chain here 
p.sort_values().head()

# make descending order
p.sort_values(ascending=False).head()

717     Zygarde
633    Zweilous
40        Zubat
569       Zorua
570     Zoroark
Name: Pokemon, dtype: object

In [87]:
# sort on a numeric series and get the top 3
stock.sort_values(ascending=False).head(3)

3011    782.22
2859    776.60
3009    773.18
Name: Stock Price, dtype: float64

In [88]:
# using the inplace paremeter to overwrite the original object
# running stock again will not be sorted as it is above becseu it wasnt saved or reassigned
stock

0        50.12
1        54.10
2        54.65
3        52.38
4        52.95
         ...  
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

In [89]:
# modify to be in descending order permanently
stock.sort_values(ascending=False, inplace=True)

In [90]:
# now test it, it in descending order now.
stock

3011    782.22
2859    776.60
3009    773.18
3007    772.88
3010    771.61
         ...  
12       50.74
10       50.70
0        50.12
9        50.07
11       49.95
Name: Stock Price, Length: 3012, dtype: float64

#### Sort_index

In [None]:
p = pd.read_csv('../datasets/pokemon.csv', usecols=['Pokemon'], squeeze=True)

In [91]:
# sorting in place will shuffle the index out of order - look at the 1st position, its 717!
p.sort_values(ascending=False, inplace=True)

p.head()

717     Zygarde
633    Zweilous
40        Zubat
569       Zorua
570     Zoroark
Name: Pokemon, dtype: object

In [92]:
# reset the index
p.sort_index(inplace=True)
p.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

#### The "in" keyword
- used to check if a value is in a series or index

In [93]:
# re import series 
stock = pd.read_csv('../datasets/google_stock_price.csv',usecols=['Stock Price'], squeeze=True)
p = pd.read_csv('../datasets/pokemon.csv', usecols=['Pokemon'], squeeze=True)

In [98]:
# the in keywrod returns true or false depending on the existence of the value within a list 
# Example
nums = [1,2,3,4,5,6]

2 in nums

True

In [99]:
# you can search by using values as well
'Charmander' in p.values

True

#### Extract by index position or label

#### Math methods

#### Value_counts method

#### Map method