## Pandas Series Data Frames

In [2]:
import pandas as pd
import numpy as np

Create a numpy array

In [3]:
# create random values from std normal distribution
arr = np.random.randn(4)
print(type(arr))
print(arr, "/n")

# A pandas series made from the array above
s = pd.Series(arr)
print(type(s))
print(s)

<class 'numpy.ndarray'>
[ 1.10577579 -0.59820492  0.00408366  0.56335478] /n
<class 'pandas.core.series.Series'>
0    1.105776
1   -0.598205
2    0.004084
3    0.563355
dtype: float64


Notice that the index is included in a Panda series. Although the array can be indexed, the index is not part of it's data structure.

In [4]:
# creating a pandas series from a NumPy array
pd.Series(np.arange(3), index = [2023, 2024, 2025])


2023    0
2024    1
2025    2
dtype: int64

In [5]:
# A series from a list of strings with default index
pd.Series(['EDS 220', 'EDS 222', 'EDS 223', 'EDS 242'])

0    EDS 220
1    EDS 222
2    EDS 223
3    EDS 242
dtype: object

In [14]:
# Create a pandas.Series from a dictionary

# Construct a dictionary - keys and values. ¿These values have the data type - 'dict'
d = {'key_0':2, 'key_1':3, 'key_2':5}

# Initialize a series using the dictionary
print(pd.Series(d))
s = pd.Series(d)
s

print(s.dtype)

# Notice, if any '' are included around the numbers in the value portion
# of the dictionary above, the data type of the series will be 'object'

key_0    2
key_1    3
key_2    5
dtype: int64
int64


In [15]:
# Create a pandas.Series from a single value
pd.Series(3.0, index = ['A', 'B', 'C'])

A    3.0
B    3.0
C    3.0
dtype: float64

# *Simple Operations*

In [23]:
# Define a series
z = pd.Series([98, 73, 65], index = ['Arnold', 'Barney', 'Crampston'])

# Divide each element in series by 10
print(z /10, '\n')

# Take the exponential (e^n) of each element in the series
print(np.exp(z), '\n')

# Original series is unchanged
print(z)

Arnold       9.8
Barney       7.3
Crampston    6.5
dtype: float64 

Arnold       3.637971e+42
Barney       5.052394e+31
Crampston    1.694889e+28
dtype: float64 

ellow 148.4131591025766
Arnold       98
Barney       73
Crampston    65
dtype: int64


In [27]:
# You can also produce new series with True/False values
# based on whether elements in a series satisfy a condition or not

print(z > 70)

print(z < 70)

# This is useful when selecting data from data frames

Arnold        True
Barney        True
Crampston    False
dtype: bool
Arnold       False
Barney       False
Crampston     True
dtype: bool


# *Identifying missing values*

In [28]:
# Series with NAs in it
na_s = pd.Series([1, 2, np.nan, 4, np.nan])

na_s

0    1.0
1    2.0
2    NaN
3    4.0
4    NaN
dtype: float64

In [30]:
# Cehck if series has NAs with hasnans
na_s.hasnans

True

In [31]:
# Get a list of booleans with the isna() attribute
na_s.isna()

0    False
1    False
2     True
3    False
4     True
dtype: bool

# *Creating a `pandas.Dataframe`*

In [34]:
# Initialize dictionary with columns' data
d = {'col_name_1' : pd.Series(np.arange(3)),
     'col_name_2' : pd.Series([3.1, 3.2, 3.3]),
    }

# Create data frame
df = pd.DataFrame(d)
df

Unnamed: 0,col_name_1,col_name_2
0,0,3.1
1,1,3.2
2,2,3.3


In [35]:
# Change index to 'a', 'b', 'c'
df.index = ['a', 'b', 'c']
df

Unnamed: 0,col_name_1,col_name_2
a,0,3.1
b,1,3.2
c,2,3.3
