In [3]:
import pandas as pd

### __Creating a Series from a list__

In [4]:
# Creating a Pandas Series from a list
data = [1, 2, 3, 4, 5]
series = pd.Series(data)

In [5]:
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


### __Creating a Series with a specified index__

In [6]:
# Creating a Pandas Series with a specified index
index = ['a', 'b', 'c', 'd', 'e']
series_with_index = pd.Series(data, index=index)

In [7]:
print(series_with_index)

a    1
b    2
c    3
d    4
e    5
dtype: int64


### __Creating a Series froma dictionary__

In [8]:
# Creating a Pandas Series from a dictionary
data_dict = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
series_from_dict = pd.Series(data_dict)

In [9]:
print(series_from_dict)

a    1
b    2
c    3
d    4
e    5
dtype: int64


### __Accesing data in a series__

In [7]:
print(series_with_index)

a    1
b    2
c    3
d    4
e    5
dtype: int64


In [5]:
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [24]:
# Accessing data in a Series
print(series[2])  # Accessing element at index 2
print(series_with_index['b'])  # Accessing element with index 'b'

3
2


## __`Basic Information in Pandas Series`__

In [5]:
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


### __Return the first rows__

In [25]:
# Return the first n rows
first_n_rows = series.head(3)

⬆️ __head(3) returns the first 3 index–value pairs, not “rows” in a table sense.__

“Give me the first 3 elements of this Series.”

In [9]:
print(first_n_rows)

0    1
1    2
2    3
dtype: int64


---

### __Return the last rows__

In [5]:
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [27]:
# Return the last n rows
last_n_rows = series.tail(3)

In [7]:
print(last_n_rows)

2    3
3    4
4    5
dtype: int64


### __Return the dimensions__

In [5]:
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [28]:
# Return dimensions (Rows, columns)
dimensions = series.shape

In [29]:
print(dimensions)

(5,)


### __Generate Descriptive Statistics__

In [30]:
# Generate descriptive statistics
stats = series.describe()

In [31]:
print(stats)

count    5.000000
mean     3.000000
std      1.581139
min      1.000000
25%      2.000000
50%      3.000000
75%      4.000000
max      5.000000
dtype: float64


| Statistic | Name               | What it tells you      | Plain-language meaning                                            |
| --------- | ------------------ | ---------------------- | ----------------------------------------------------------------- |
| `count`   | Count              | Number of valid values | How many data points you actually have (excluding missing values) |
| `mean`    | Mean (Average)     | Central tendency       | The average value                                                 |
| `std`     | Standard Deviation | Spread / variability   | How far values typically are from the mean                        |
| `min`     | Minimum            | Lower bound            | The smallest value in the dataset                                 |
| `25%`     | 1st Quartile (Q1)  | Lower spread           | 25% of values are at or below this number                         |
| `50%`     | Median             | Middle value           | Half the values are below, half above                             |
| `75%`     | 3rd Quartile (Q3)  | Upper spread           | 75% of values are at or below this number                         |
| `max`     | Maximum            | Upper bound            | The largest value in the dataset                                  |


### __Return Unique Values__

In [32]:
# Return unique values
unique_values = series.unique()

In [33]:
print(unique_values)

[1 2 3 4 5]


### __Return the Number of unique Values__

In [34]:
# Return the number of unique values
num_unique_values = series.nunique()

In [35]:
print(num_unique_values)

5


## Operations and Transformations in Pandas Series

In [41]:
# Element-wise addition
result_series = series + series_with_index

__Pandas does not add by position. It adds by index label.__

There are no matching index labels between the two Series:

* First series indices: 0, 1, 2, 3, 4

* Second series indices: a, b, c, d, e

In [37]:
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [38]:
print(series_with_index)

a    1
b    2
c    3
d    4
e    5
dtype: int64


__First column__ (left) → the index (0–4, a–e)

__Second column__ (right) → the result of addition

* Pandas tried to add values with the same index

* None existed in both Series at the same index

In [39]:
print(result_series)

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
a   NaN
b   NaN
c   NaN
d   NaN
e   NaN
dtype: float64


It aligned by index labels, and since the indexes didn’t match, the result is NaN

match index labels:

0 with a → no match → NaN

1 with b → no match → NaN

...


Since none of the labels overlap, every result is NaN.

### __Apply a function to each element__

In [42]:
# Apply a function to each element
squared_series = series.apply(lambda x: x**2)

In [43]:
print(squared_series)

0     1
1     4
2     9
3    16
4    25
dtype: int64


### __Apply a function to each element__

In [46]:
# Map values using a dictionary
mapped_series = series.map({1: 'one', 2: 'two', 3: 'three'})

__Mapping values means replacing values in a Series according to a lookup rule.__

In [47]:
print(mapped_series)

0      one
1      two
2    three
3      NaN
4      NaN
dtype: object


Pandas goes through the Series value by value and:

* looks up each value in the dictionary

* replaces it if there’s a match

* returns NaN if there’s no match

### __Sort the series by values__

In [13]:
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [48]:
# Sort the Series by values
sorted_series = series.sort_values()

__sorts a Pandas Series by its values, not by its index.__

* Reorders the Series from smallest to largest value by default

* Keeps the original index attached to each value

In [15]:
import pandas as pd

series = pd.Series([30, 10, 20], index=['a', 'b', 'c'])
series.sort_values()


b    10
c    20
a    30
dtype: int64

In [49]:
print(sorted_series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [50]:
# Check for missing values
missing_values = series.isnull()

__It checks each element in the Series and tells you whether it is missing.__

* Returns a boolean Series

* True → value is missing (NaN)

* False → value is not missing

In [51]:
print(missing_values)

0    False
1    False
2    False
3    False
4    False
dtype: bool


In [54]:
# Fill missing values with a specified value
filled_series = series.fillna(0)

__It replaces missing values (NaN) in the Series with 0 and returns a new Series.__

* Original series → unchanged

* filled_series → no missing values (they’re now 0)

In [53]:
print(filled_series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


## Querying a Series
Selecting and filtering data based on specific conditions 

In [16]:
# Create a Pandas Series
data = {'a': 10, 'b': 20, 'c': 30, 'd': 40, 'e': 50}
series = pd.Series(data)


In [17]:
print(series)

a    10
b    20
c    30
d    40
e    50
dtype: int64


In [18]:
# Select elements greater than 30
selected_greater_than_30 = series[series > 30]

In [57]:
print(selected_greater_than_30)

d    40
e    50
dtype: int64


In [58]:
# Select elements equal to 20
selected_equal_to_20 = series[series == 20]

In [59]:
print(selected_equal_to_20)

b    20
dtype: int64


In [60]:
# Select elements not equal to 40
selected_not_equal_to_40 = series[series != 40]

In [61]:
print(selected_not_equal_to_40)

a    10
b    20
c    30
e    50
dtype: int64


In [62]:
# Select elements based on multiple conditions
selected_multiple_conditions = series[(series > 20) & (series < 50)]

In [63]:
print(selected_multiple_conditions)

c    30
d    40
dtype: int64


In [64]:
# Select elements based on a list of values
selected_by_list = series[series.isin([20, 40, 60])]

In [65]:
print(selected_by_list)

b    20
d    40
dtype: int64


In [66]:
# Select elements using string methods (if applicable)
string_series = pd.Series(['apple', 'banana', 'cherry', 'date', 'elderberry'])
selected_by_string_method = string_series[string_series.str.startswith('b')]

__This created a Series of strings:__

0    apple

1    banana

2    cherry

3    date

4    elderberry


* .str lets you apply string methods to every value in the Series at once

* Similar to calling .startswith() on each string, but done efficiently
  
  __`startswith()` is a built-in Python string method.__

In [None]:
0    False

1    True

2    False

3    False

4    False


In [67]:
print(selected_by_string_method)

1    banana
dtype: object


In [68]:
# Query based on index labels
selected_by_index_labels = series.loc[['a', 'c', 'e']]

In [69]:
print(selected_by_index_labels)

a    10
c    30
e    50
dtype: int64


In [70]:
# Query based on numeric position
selected_by_numeric_position = series.iloc[1:4]

In [71]:
print(selected_by_numeric_position)

b    20
c    30
d    40
dtype: int64


__Sample sales data:__

In [20]:
sales_data = [120, 150, 130, 170, 160, 180, 140]

days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

1. Create a Pandas Series for sales data

In [21]:
sales_data_series = pd.Series(sales_data)

In [22]:
print(sales_data_series)

0    120
1    150
2    130
3    170
4    160
5    180
6    140
dtype: int64


In [23]:
data = pd.Series(sales_data, index=days_of_week)

In [24]:
print(data)

Monday       120
Tuesday      150
Wednesday    130
Thursday     170
Friday       160
Saturday     180
Sunday       140
dtype: int64


2. Access and manipulate sales data
* Access sales data for specific days using index labels
* Calculate total sales for the week
* Identify the day with the highest and lowest sales

In [26]:
# Accessing data in a Series
print(data.iloc[2])  # Accessing element at index 2
print(data['Friday'])  # Accessing element with index 'b'

130
160


3. Basic analysis of sales data
* Calculate the average sales for the week
* Determine the days with sales figures significantly different from the average

In [28]:
data.mean()

np.float64(150.0)

In [29]:
data.std()

np.float64(21.602468994692867)

In [33]:
mean = data.mean()
std = data.std()

sales_figures_significantly_different = data[(data > mean + 2 * std) | (data < mean - 2 * std)]

In [34]:
print(sales_figures_significantly_different)

Series([], dtype: int64)
