# Exercises

In [6]:
#1. How many annotations you have per month and year. Which month has more annotation files.
import os
import re
from datetime import datetime
import pandas as pd

# Path to the folder containing annotation files
annotations_path = '/Users/victorjansen/Desktop/Esade/Python_for_Data_Science/weeks/week_4/session_4/annotations'

# Dictionary to track annotation counts by month
monthly_annotation_count = {}

# Iterating over all files in the directory
for annotation_file in os.listdir(annotations_path):
    # Regex to extract date pattern from filenames
    file_match = re.match(r'(\d{8}_\d{6}).*\.txt', annotation_file)
    if file_match:
        timestamp = file_match.group(1)  # Extract timestamp string
        file_date = datetime.strptime(timestamp, "%Y%m%d_%H%M%S")  # Convert to datetime object
        
        month_year = file_date.strftime("%Y-%m")  # Extract year-month format
        
        # Update the count for the respective month
        if month_year not in monthly_annotation_count:
            monthly_annotation_count[month_year] = 0
        monthly_annotation_count[month_year] += 1

# Determine the month with the highest annotation count
top_month = max(monthly_annotation_count, key=monthly_annotation_count.get)

# Create a DataFrame for visualization
annotations_df = pd.DataFrame(
    list(monthly_annotation_count.items()), 
    columns=["Month-Year", "Annotation Count"]
)

# Sort the DataFrame by count in descending order
annotations_df = annotations_df.sort_values(by="Annotation Count", ascending=False)

# Print the DataFrame
print("Monthly annotation counts sorted by frequency:")
print(annotations_df)

# Print the month with the most annotations
print("\nMonth with the highest number of annotations:")
print(f"{top_month} with {monthly_annotation_count[top_month]} annotations")


Monthly annotation counts sorted by frequency:
  Month-Year  Annotation Count
1    2024-06                52
3    2024-02                45
2    2024-04                37
5    2024-05                28
0    2024-01                27
4    2024-03                17

Month with the highest number of annotations:
2024-06 with 52 annotations


In [20]:
#2. Create a dictionary where each **key** is a month, and the corresponding **value** is a list containing all the annotation names with where their date corresponds to the month. 
#a. Save it following the json format, and load it again to check that everything is ok.
import json

# Grouping annotations by month
annotations_grouped = {}

for file in os.listdir(annotations_path):
    match = re.match(r'(\d{8}_\d{6}).*\.txt', file)  # Extracting date part from the file name
    if match:
        date_str = match.group(1)
        date = datetime.strptime(date_str, "%Y%m%d_%H%M%S")  # Converting to datetime
        month_key = date.strftime("%Y-%m")  # Extracting month (YYYY-MM)
        
        # Add the annotation name to the corresponding month
        if month_key not in annotations_grouped:
            annotations_grouped[month_key] = []
        annotations_grouped[month_key].append(file)

# Saving the dictionary to a JSON file
json_path = '/Users/victorjansen/Desktop/Esade/Python_for_Data_Science/weeks/week_5/annotations_by_month.json'
with open(json_path, 'w') as json_file:
    json.dump(annotations_grouped, json_file)

# Loading the JSON file to verify
with open(json_path, 'r') as json_file:
    loaded_annotations = json.load(json_file)

# Display results
print(f"JSON saved at: {json_path}")
print("Loaded JSON data:")
print(loaded_annotations)
    

JSON saved at: /Users/victorjansen/Desktop/Esade/Python_for_Data_Science/weeks/week_5/annotations_by_month.json
Loaded JSON data:
{'2024-01': ['20240102_185527_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_740_3850.txt', '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3770.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4164.txt', '20240102_185954_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_414_3786.txt', '20240104_220339_SN31_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_556_4178.txt', '20240115_213834_SN28_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_376_3722.txt', '20240126_173752_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_386_3722.txt', '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3772.txt', '20240130_173903_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_366_3756.txt', '20240127_190620_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_500_3600.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4162.txt', '20240127_190620_SN27_QUICKVIEW_VISUAL_1_1_

In [31]:
#b. Save it this time using Pickle.
import pickle

# Define path for Pickle file
pickle_file_path = '/Users/victorjansen/Desktop/Esade/Python_for_Data_Science/weeks/week_5/annotations_by_month.pkl'

# Save the dictionary using Pickle
with open(pickle_file_path, 'wb') as file:
    pickle.dump(annotations_grouped, file)

# Load the Pickle file to verify its contents
with open(pickle_file_path, 'rb') as file:
    verified_annotations = pickle.load(file)

# Output results to confirm everything is correct
print(f"Pickle file stored at: {pickle_file_path}")
print("Verified data from Pickle file:")
print(verified_annotations)

    

Pickle file stored at: /Users/victorjansen/Desktop/Esade/Python_for_Data_Science/weeks/week_5/annotations_by_month.pkl
Verified data from Pickle file:
{'2024-01': ['20240102_185527_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_740_3850.txt', '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3770.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4164.txt', '20240102_185954_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_414_3786.txt', '20240104_220339_SN31_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_556_4178.txt', '20240115_213834_SN28_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_376_3722.txt', '20240126_173752_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_386_3722.txt', '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3772.txt', '20240130_173903_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_366_3756.txt', '20240127_190620_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_500_3600.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4162.txt', '20240127_190620_SN27_

In [17]:
#c. Instead of storing a list of all the annotation names happening that month, let's create for each annotation a dictionary with keys: name and date (using a datetime object).

# Grouping annotations by month, with each annotation as a dictionary
annotations_grouped = {}

for file in os.listdir(annotations_path):
    match = re.match(r'(\d{8}_\d{6}).*\.txt', file)  # Extracting date part from the file name
    if match:
        date_str = match.group(1)
        date = datetime.strptime(date_str, "%Y%m%d_%H%M%S")  # Converting to datetime
        month_key = date.strftime("%Y-%m")  # Extracting month (YYYY-MM)
        
        # Creating annotation dictionary with name and date
        annotation_details = {"name": file, "date": date}
        
        # Add the annotation dictionary to the corresponding month
        if month_key not in annotations_grouped:
            annotations_grouped[month_key] = []
        annotations_grouped[month_key].append(annotation_details)

# Save the updated dictionary to a Pickle file
pickle_file_path = '/Users/victorjansen/Desktop/Esade/Python_for_Data_Science/weeks/week_5/annotations_by_month.pkl'
with open(pickle_file_path, 'wb') as file:
    pickle.dump(annotations_grouped, file)

# Load the Pickle file to verify
with open(pickle_file_path, 'rb') as file:
    verified_annotations = pickle.load(file)

# Output results to confirm everything is correct
print(f"Pickle file stored at: {pickle_file_path}")
print("Verified data from Pickle file:")
print(verified_annotations)

Pickle file stored at: /Users/victorjansen/Desktop/Esade/Python_for_Data_Science/weeks/week_5/annotations_by_month.pkl
Verified data from Pickle file:
{'2024-01': [{'name': '20240102_185527_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_740_3850.txt', 'date': datetime.datetime(2024, 1, 2, 18, 55, 27)}, {'name': '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3770.txt', 'date': datetime.datetime(2024, 1, 1, 17, 43, 1)}, {'name': '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4164.txt', 'date': datetime.datetime(2024, 1, 1, 19, 28, 56)}, {'name': '20240102_185954_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_414_3786.txt', 'date': datetime.datetime(2024, 1, 2, 18, 59, 54)}, {'name': '20240104_220339_SN31_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_556_4178.txt', 'date': datetime.datetime(2024, 1, 4, 22, 3, 39)}, {'name': '20240115_213834_SN28_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_376_3722.txt', 'date': datetime.datetime(2024, 1, 15, 21, 38, 34)}, {'name': '20240126_173752_S

In [40]:
#3. Print all the annotations from the oldest ones to the newest one during the seconf half of the 2024.
# Filter and sort annotations from the second half of 2024
second_half_annotations = []

for month, annotations in annotations_grouped.items():
    # Check if the month is in the second half of 2024 (July to December)
    if "2024-07" <= month <= "2024-12":
        for annotation in annotations:
            if isinstance(annotation, dict) and 'date' in annotation:
                second_half_annotations.append(annotation)  # Add valid annotations

# Sort annotations by date
second_half_annotations.sort(key=lambda annotation: annotation['date'])

# Print sorted annotations
print("Annotations from the second half of 2024 (oldest to newest):")
for annotation in second_half_annotations:
    print(f"Name: {annotation['name']}, Date: {annotation['date']}")

#add a print statement if there are no annotations in the second half of 2024
if not second_half_annotations:
    print("  - No annotations found in the second half of 2024. -")



Annotations from the second half of 2024 (oldest to newest):
  - No annotations found in the second half of 2024. -
