# Exercise 5

### 1. How many annotations you have per month and year. Which month has more annotation files.

In [1]:
import os
import glob
import re
from datetime import datetime

# Define file paths and pattern
folder_path = '/Users/alexanderlange/Desktop/Github/Week 4/annotations'
all_files = os.listdir(folder_path)
annotations = glob.glob(f'{folder_path}/*.txt')
pattern = r'(\d{8})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt'

# Initialize annotation counts
annotation_counts = {}

# Process each annotation file
for annotation in annotations:
    filename = os.path.basename(annotation)
    match = re.match(pattern, filename)
    if match:
        date, time, _, _, _ = match.groups()
        datetime_str = date + time
        datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H%M%S")
        year = datetime_obj.year
        month = datetime_obj.month

        if year not in annotation_counts:
            annotation_counts[year] = {}

        if month not in annotation_counts[year]:
            annotation_counts[year][month] = 0

        annotation_counts[year][month] += 1

# Find the month with the most annotations
max_annotations = 0
max_month = None

for year, months in annotation_counts.items():
    for month, count in months.items():
        if count > max_annotations:
            max_annotations = count
            max_month = (year, month)

# Display the results
print("Annotation Analysis Report ")
print(f"Total files in folder: {len(all_files)}")
print(f"Total annotation files processed: {len(annotations)}")
print("\nAnnotations per year and month:")

for year, months in sorted(annotation_counts.items()):
    for month, count in sorted(months.items()):
        print(f"Year: {year}, Month: {month:02d}, Annotations: {count}")

if max_month:
    print(f"Month with the most annotations: Year {max_month[0]}, Month {max_month[1]:02d} with {max_annotations} annotations")
else:
    print("\nNo annotations were found.")

 Annotation Analysis Report 
Total files in folder: 207
Total annotation files processed: 206

Annotations per year and month:
Year: 2024, Month: 01, Annotations: 27
Year: 2024, Month: 02, Annotations: 45
Year: 2024, Month: 03, Annotations: 17
Year: 2024, Month: 04, Annotations: 25
Year: 2024, Month: 05, Annotations: 28
Year: 2024, Month: 06, Annotations: 52
Month with the most annotations: Year 2024, Month 06 with 52 annotations


### 2. Create a dictionary where each **key** is a month, and the corresponding **value** is a list containing all the annotation names with where their date corresponds to the month.
- Save it following the json format, and load it again to check that everything is ok. 
- Save it this time using Pickle.
- Instead of storing a list of all the annotation names happening that month, let's create for each annotation a dictionary with keys: name and date (using a datetime object).

In [3]:
import os
import glob
import re
import json
import pickle
from datetime import datetime

# Define file paths and pattern
annotations = glob.glob('/Users/alexanderlange/Desktop/Github/Week 4/annotations/*.txt')
pattern = r'(\d{8})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt'

# Initialize dictionary to group files by month
files_by_month = {}

# Process annotation files
for annotation in annotations:
    filename = os.path.basename(annotation)
    match = re.match(pattern, filename)
    if match:
        date, time, _, _, _ = match.groups()
        datetime_str = date + time
        datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H%M%S")

        # Group files by month
        if datetime_obj.month not in files_by_month:
            files_by_month[datetime_obj.month] = []
        files_by_month[datetime_obj.month].append(filename)

# Save data to JSON file
json_file = 'annotations_by_month.json'
with open(json_file, 'w') as f:
    json.dump(files_by_month, f, default=str, indent=4)

# Load data from JSON file for verification
with open(json_file, 'r') as f:
    loaded_annotations_json = json.load(f)

# Save data to Pickle file
pickle_file = 'annotations_by_month.pkl'
with open(pickle_file, 'wb') as f:
    pickle.dump(files_by_month, f)

# Display the results
print("Annotation Processing Report ")
print(f"Total annotation files processed: {len(annotations)}")
print(f"Data successfully saved in:")
print(f"  • JSON file: {json_file}")
print(f"  • Pickle file: {pickle_file}")
print("\nLoaded Data from JSON:")
print(json.dumps(loaded_annotations_json, indent=4))

Annotation Processing Report 
Total annotation files processed: 206
Data successfully saved in:
  • JSON file: annotations_by_month.json
  • Pickle file: annotations_by_month.pkl

Loaded Data from JSON:
{
    "1": [
        "20240102_185527_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_740_3850.txt",
        "20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3770.txt",
        "20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4164.txt",
        "20240102_185954_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_414_3786.txt",
        "20240104_220339_SN31_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_556_4178.txt",
        "20240115_213834_SN28_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_376_3722.txt",
        "20240126_173752_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_386_3722.txt",
        "20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3772.txt",
        "20240130_173903_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_366_3756.txt",
        "20240127_190620_SN27_QUICKVIEW_VISUAL

### 3. Print all the annotations from the oldest ones to the newest one during the seconf half of the 2024. 

In [4]:
import os
import glob
import re
from datetime import datetime

# Define file paths and pattern
folder_path = '/Users/alexanderlange/Desktop/Github/Week 4/annotations'
all_files = os.listdir(folder_path)
annotations = glob.glob(f'{folder_path}/*.txt')
pattern = r'(\d{8})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt'

# Initialize list to store matching annotations
ann_datetime = []

# Process annotation files
for annotation in annotations:
    filename = os.path.basename(annotation)
    match = re.match(pattern, filename)
    if match:
        date, time, _, _, _ = match.groups()
        datetime_str = date + time
        datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H%M%S")

        # Check if the file is from the second half of 2024
        if datetime_obj.year == 2024 and 7 <= datetime_obj.month <= 12:
            ann_datetime.append((filename, datetime_obj))

# Display the results
print("Annotations from the Second Half of 2024 ")
if ann_datetime:
    for filename, datetime_obj in ann_datetime:
        print(f"File: {filename} | Datetime: {datetime_obj}")
else:
    print("No matching annotations found for the specified period.")

Annotations from the Second Half of 2024 
No matching annotations found for the specified period.
