# Pandas core data structures: Series

## Creating a `pandas.Series`

- A `Series` is a one-dimensional data structure
- It has values and an index
- If you donâ€™t specify an index, pandas creates one automatically (0, 1, 2, 3)

In [None]:
"creating a series using a simple Python list"

import pandas as pd

data_list = [1, 2, 3]

print("Creating a series using a simple Python list")
data_series = pd.Series(data=data_list)

print(data_list)

In [None]:
"creating a series with a list and custom index"

import pandas as pd

scores = pd.Series(
    data=[88, 92, 79, 95],
    index=["Alice", "Bob", "Charlie", "Diana"],
    name="scores"
)

print("Creating a series with custom index")
print(scores)


In [None]:
"Creating a pandas series using a Python dictionary. Dictionary keys become the index."

grade_dict = {
    "Alice": 88,
    "Bob": 92,
    "Charlie": 79,
    "Diana": 95
}

scores_from_dict = pd.Series(grade_dict)

print("\nCreating a Series from a dictionary:\n")
print(scores_from_dict)


In [None]:
"Creating a pandas series using a 1-d numpy array"
import numpy as np

color_array = np.array(['red', 'blue', 'green'])

colors_from_array = pd.Series(data=color_array)

print("\nCreating a Series from a numpy array:\n")
print(colors_from_array)


## Inspecting a series
value, index,

In [None]:
import pandas as pd

# Create a sample Series
scores = pd.Series(
    data=[88, 92, 79, 95],
    index=["Alice", "Bob", "Charlie", "Diana"],
    name="Exam Scores"
)

print("Inspecting the Series:\n")
print(scores)

In [None]:
print("\nInspecting .values (underlying data stored in the Series):")
print("scores.values =", scores.values)

In [None]:
print("\nInspecting .index (labels associated with each value):")
print("scores.index =", scores.index)

In [None]:
print("\nInspecting .dtype (data type of the values):")
print("scores.dtype =", scores.dtype)

In [None]:
print("\nInspecting .shape (dimensions of the Series):")
print("scores.shape =", scores.shape)

In [None]:
print("\nInspecting .name (optional name of the Series):")
print("scores.name =", scores.name)


## Accessing data

- By label: `.loc`
- By position: `.iloc`
- By Boolean condition

In [None]:
"Access by label: scores['Alice'] is equivalent to score.loc['Alice']"

print("Accessing data via label:\n")
print('score["Bob"] = ', scores["Bob"])
print('score.loc["Alice"] = ', scores.loc['Alice'])

In [None]:
"Access by position"

print("\nAccessing data via position:\n")
print("scores.iloc[0] = ", scores.iloc[0])


In [None]:
"Accessing multiple elements"

print("\nAccessing multiple values by label:\n")
print('scores[["Alice", "Diana"]] =\n', scores[["Alice", "Diana"]])

print("\nAccessing multiple values by position:\n")
print("scores.iloc[[0, 3]] =\n", scores.iloc[[0, 3]])


In [None]:
"Boolean indexing (filtering)"

print("\nBoolean filtering:\n")
print("scores >= 90:\n", scores >= 90)

In [None]:
print("\nScores greater than or equal to 90:\n")
print(scores[scores >= 90])



## Vectorized operations
No loops. Operations apply element-wise automatically.

In [None]:
"Arithmetic operations"

import pandas as pd

scores = pd.Series(
    [88, 92, 79, 95],
    index=["Alice", "Bob", "Charlie", "Diana"]
)

print("Original Series:\n")
print(scores)

print("\nAdding 5 points to each score (vectorized operation):")
print("scores + 5 =\n")
print(scores + 5)

print("\nMultiplying each score by 1.1 (vectorized operation):")
print("scores * 1.1 =\n")
print(scores * 1.1)


In [None]:
"Comparison operations (returns Boolean Series). Vectorized comparisons return a Boolean Series of the same shape."

print("\nComparing each value to a condition (scores >= 90):")
print("scores >= 90 =\n")
print(scores >= 90)


In [None]:
"Operations between two Series (automatic index alignment). Pandas aligns values by index labels, not by position."

bonus = pd.Series(
    [5, 10, 0],
    index=["Alice", "Bob", "Eve"]
)

print("\nFirst Series (scores):\n")
print(scores)

print("\nSecond Series (bonus points):\n")
print(bonus)

print("\nAdding two Series together (index alignment happens automatically):")
print("scores + bonus =\n")
print(scores + bonus)


In [None]:
"Handling missing values in vectorized operations"

print("\nHandling missing values in vectorized operations:")
print("Notice NaN where data is missing.\n")
print(scores + bonus)

print("\nWe can use Series.add with fill_value=0")
print(scores.add(bonus, fill_value=0))


## Vectorized aggregation functions



In [None]:
print("\nVectorized aggregation functions:")

print("\nMean of scores:")
print("scores.mean() =", scores.mean())

print("\nMaximum score:")
print("scores.max() =", scores.max())

print("\nStandard deviation of scores:")
print("scores.std() =", scores.std())


In [None]:
"Applying NumPy-style functions"

import numpy as np

print("\nApplying NumPy functions to a Series (still vectorized):")
print("np.log(scores) =\n")
print(np.log(scores))


## Common Series operations


In [None]:
"Setup & Example Series"

import pandas as pd

# Categorical Series (with repeated values)
cities = pd.Series(
    ["NYC", "LA", "NYC", "Chicago", "LA", "NYC", "Boston"],
    name="City"
)

# Numeric Series
scores = pd.Series(
    [88, 92, 79, 95, 92],
    name="Scores"
)

print("Categorical Series:\n")
print(cities)

print("\nNumeric Series:\n")
print(scores)


In [None]:
"Previewing Data: .head() and .tail()"

print("Previewing the first 3 values using .head():")
print("cities.head(3) =\n", cities.head(3))

print("\nPreviewing the last 3 values using .tail():")
print("cities.tail(3) =\n", cities.tail(3))


In [None]:
"Aggregation Methods (Numeric Series). note: .count() ignores missing values, .size does not."

print("Aggregation methods on numeric Series:\n")

print("scores.sum()      =", scores.sum())
print("scores.mean()     =", scores.mean())
print("scores.min()      =", scores.min())
print("scores.max()      =", scores.max())
print("scores.median()   =", scores.median())
print("scores.std()      =", scores.std())
print("scores.count()    =", scores.count())
print("scores.size       =", scores.size)

In [None]:
"Summary Statistics of the series: .describe()"

print("Summary statistics of numeric Series:\n")
print(scores.describe())

print("\nSummary statistics of categorical Series:\n")
print(cities.describe())

In [None]:
"Check Distinct Values: .unique() and .nunique()"
print("Finding distinct values using .unique():")
print("cities.unique() =", cities.unique())

print("\nCounting distinct values using .nunique():")
print("cities.nunique() =", cities.nunique())


In [None]:
"""
Frequency Tables: .value_counts()
normalize=True returns proportions instead of counts.
"""

print("Frequency table using .value_counts():\n")
print(cities.value_counts())

print("\nNormalized frequency (proportions):\n")
print(cities.value_counts(normalize=True))


In [None]:
"Sorting Values and Index"

print("Sorting values in numeric Series:\n")
print("scores.sort_values() =\n")
print(scores.sort_values())

print("\nSorting by index:\n")
print("scores.sort_index() =\n")
print(scores.sort_index())


## Inspecting and handling missing data

In [None]:
import pandas as pd

scores = pd.Series(
    [88, 92, None, 95],
    index=["Alice", "Bob", "Charlie", "Diana"],
    name="Exam Scores"
)

print("Original Series with missing values:\n")
print(scores)

print("\nDetecting missing values using .isna():")
print(scores.isna())

print("\nYou can count missing values using .isna().sum():")
print("The total number of None value is: ", scores.isna().sum())

print("\nDetecting non-missing values using .notna():")
print(scores.notna())

print("\nDropping missing values using .dropna():")
print(scores.dropna())

print("\nFilling missing values with a constant using .fillna(0):")
print(scores.fillna(0))

print("\nFilling missing values with the mean:")
print(scores.fillna(scores.mean()))
