### ***Note Before Proceeding***
1. Download the "annotations" folder in the GitHub Repository
2. Make sure to update the "annotations" variable to the specific path where you downloaded the "Annotations"

### **Question 1. How many files the annotations folder has?**

In [48]:
import os
import glob

In [49]:
annotations = '/Users/biancabaldonado/Desktop/ESADE/session_4/annotations'
files = [x for x in os.listdir(annotations) if x != '.DS_Store'] #excluding the .DS_Store file, in case this file was autogenerated
print("----Total Number of Files in Annotation Folder----")
print(f'There are {len(files)} files in the folder')

----Total Number of Files in Annotation Folder----
There are 206 files in the folder


### **Question 2. How many of them follow the name convention?**

In [50]:

pattern = r'(\d{8})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt' 

# Extracting all files in the folder, except .DS_Store
annotations = glob.glob('/Users/biancabaldonado/Desktop/ESADE/session_4/annotations/*.txt')
annotations = [x for x in annotations if os.path.basename(x) != '.DS_Store']

correct_convention = []
incorrect_convention = []

for annotation in annotations:
    # extract the file name
    filename = os.path.basename(annotation)
    
    # Search and extract values
    match = re.match(pattern, filename)
    if match:
        correct_convention.append(filename)

print("----Files with Correct Naming Convention----")
print(f'There are {len(correct_convention)} files that follow the naming convention')


----Files with Correct Naming Convention----
There are 194 files that follow the naming convention


### ***Disclaimer***

All those part of incorrect convention will not be included in the succeeding numbers since they do not follow the naming convention pattern and as such, the numbers in the file are not intuitive and would need further information on how to interpret (only given the corresponding values for those following the naming convention and as such, I have only extracted the data from these)

### **Question 3. How many of annotations you have per month and year? Which month has more annotation files?**

In [51]:
#Disclaimer: Ensure you have a Python version of at least 3.10+ for the match function to work
import re
import glob
import os
from datetime import datetime
from collections import Counter

pattern = r'(\d{8})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt' 

annotations = glob.glob('/Users/biancabaldonado/Desktop/ESADE/session_4/annotations/*.txt')

ann_datetime = []
total_years = []
total_months = []
total_year_month = []

for annotation in annotations:

    filename = os.path.basename(annotation)
    
    match = re.match(pattern, filename)
    if match:
        date, time, _, _, _ = match.groups()

        datetime_str = date + time 

        datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H%M%S")

        year = datetime_obj.year
        month = datetime_obj.month
        
        ann_datetime.append((year, month))

total_years = []
total_months = []
total_year_month = []

for year, month in ann_datetime:
    total_years.append(year) 
    total_months.append(month)
    total_year_month.append((year,month))
      
years_count = Counter(total_years)
months_count = Counter(total_months)
yearmonth_count = Counter(total_year_month)

print("----Number of Files per Year----")
for x, y in years_count.items():
    print(f"Year: {x}, Count: {y}")

print("\n----Number of Files per Month----")
for x, y in months_count.items():
    print(f"Month: {x}, Count: {y}") 

print("\n----Number of Files per Year and Month----")
for x, y in yearmonth_count.items():
    print(f"Year & Month: {x}, Count: {y}") 

max_month = max(months_count, key=months_count.get)

def convert_month(month):
    match month:
        case 1:return "January"
        case 2:return "February"
        case 3:return "March"
        case 4:return "April"
        case 5:return "May"
        case 6:return "June"
        case 7:return "July"
        case 8:return "August"
        case 9:return "September"
        case 10:return "October"
        case 11:return "November"
        case 12:return "December"
        case _:return "Error"  

max_month = convert_month(max_month)

print("\n----Month that has the most annotations:----")
print(max_month)


----Number of Files per Year----
Year: 2024, Count: 194

----Number of Files per Month----
Month: 1, Count: 27
Month: 6, Count: 52
Month: 4, Count: 25
Month: 2, Count: 45
Month: 3, Count: 17
Month: 5, Count: 28

----Number of Files per Year and Month----
Year & Month: (2024, 1), Count: 27
Year & Month: (2024, 6), Count: 52
Year & Month: (2024, 4), Count: 25
Year & Month: (2024, 2), Count: 45
Year & Month: (2024, 3), Count: 17
Year & Month: (2024, 5), Count: 28

----Month that has the most annotations:----
June


### **Question 4. Create a new annotations folder with multiple folders corresponding to a month.**

In [52]:
import os
import re
import shutil
from datetime import datetime

pattern = r'(\d{8})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt'
annotations = '/Users/biancabaldonado/Desktop/ESADE/session_4/annotations'

monthsfolder = ["January", "February", "March", "April", "May", "June", 
                "July", "August", "September", "October", "November", "December"]

for month in monthsfolder:
    os.makedirs(os.path.join(annotations, month), exist_ok=True)

moved_files = 0
ds_store_files = 0
pattern_mismatches = 0
original_paths = {}

for root, dirs, files in os.walk(annotations):
    for filename in files:
        if filename == '.DS_Store':
            ds_store_files += 1
            continue
        
        if filename.endswith('.txt'):
            match = re.match(pattern, filename)
            
            if match:
                date, time, _, _, _ = match.groups()
                datetime_str = date + time
                datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H%M%S")
                
                month_name = monthsfolder[datetime_obj.month - 1]
                dest_folder = os.path.join(annotations, month_name)
                dest_file_path = os.path.join(dest_folder, filename)

                if not os.path.exists(dest_file_path):
                    src_file_path = os.path.join(root, filename)
                    shutil.move(src_file_path, dest_file_path)
                    original_paths[dest_file_path] = src_file_path
                    moved_files += 1

            else:
                pattern_mismatches += 1

print("----Success: New Annotation Folders per Month Added----")
print(f"\nSummary: \n- {moved_files} files moved to each folder based on the month,")
print(f"- {ds_store_files} '.DS_Store' files skipped,")
print(f"- {pattern_mismatches} files did not match the pattern.")


----Success: New Annotation Folders per Month Added----

Summary: 
- 194 files moved to each folder based on the month,
- 1 '.DS_Store' files skipped,
- 0 files did not match the pattern.


### **Question 5. Print all the annotations from the most recent to the oldest one.** 

In [53]:
import os
import re
from datetime import datetime

pattern = r'(\d{8})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt'
annotations = '/Users/biancabaldonado/Desktop/ESADE/session_4/annotations'

ann_datetime = []

for root, dirs, files in os.walk(annotations):
    for filename in files:
        if filename.endswith('.txt'):
            file_path = os.path.join(root, filename)
            match = re.match(pattern, filename)
            if match:
                date, time, satellite_number, _, _ = match.groups()
                datetime_str = date + time
                datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H%M%S")
                ann_datetime.append((file_path, datetime_obj))

recent_to_oldest = sorted(ann_datetime, key=lambda x: x[1], reverse=True)

print("----Files Arranged from Recent to Oldest----")
for index, (file_path, datetime_obj) in enumerate(recent_to_oldest, start=1):
    year = datetime_obj.year
    month = datetime_obj.month
    print(f"{index}. {file_path} --> [Year: {year}, Month: {month}]")


----Files Arranged from Recent to Oldest----
1. /Users/biancabaldonado/Desktop/ESADE/session_4/annotations/June/20240623_215120_SN29_QUICKVIEW_VISUAL_1_7_0_SATL-2KM-10N_596_4134.txt --> [Year: 2024, Month: 6]
2. /Users/biancabaldonado/Desktop/ESADE/session_4/annotations/June/20240623_215102_SN43_QUICKVIEW_VISUAL_1_7_0_SATL-2KM-11N_384_3750.txt --> [Year: 2024, Month: 6]
3. /Users/biancabaldonado/Desktop/ESADE/session_4/annotations/June/20240623_193704_SN27_QUICKVIEW_VISUAL_1_7_0_SATL-2KM-11N_566_3734.txt --> [Year: 2024, Month: 6]
4. /Users/biancabaldonado/Desktop/ESADE/session_4/annotations/June/20240619_215556_SN29_QUICKVIEW_VISUAL_1_7_0_SATL-2KM-10N_742_4460.txt --> [Year: 2024, Month: 6]
5. /Users/biancabaldonado/Desktop/ESADE/session_4/annotations/June/20240619_185757_SN24_QUICKVIEW_VISUAL_1_7_0_SATL-2KM-11N_528_3700.txt --> [Year: 2024, Month: 6]
6. /Users/biancabaldonado/Desktop/ESADE/session_4/annotations/June/20240619_052401_SN30_QUICKVIEW_VISUAL_1_7_0_SATL-2KM-52N_368_4336.tx

### **Question 6. How many different satellites there are, how many annotations we have per satellite number, and which one was used in the most recent annotation file.** 

In [54]:
import os
import re
from collections import Counter
from datetime import datetime

pattern = r'(\d{8})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt'
annotations = '/Users/biancabaldonado/Desktop/ESADE/session_4/annotations'

monthsfolder = ["January", "February", "March", "April", "May", "June", 
                "July", "August", "September", "October", "November", "December"]

satellites = []
ann_datetime = []

for root, dirs, files in os.walk(annotations):
    for filename in files:
        if filename == '.DS_Store':  
            continue
        if filename.endswith('.txt'):
            match = re.match(pattern, filename)
            if match:
                date, time, satellite_number, _, _ = match.groups()
                datetime_str = date + time
                datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H%M%S")
                
                ann_datetime.append((filename, datetime_obj, satellite_number))
                satellites.append(satellite_number)

satellites_count = Counter(satellites)

print("----Number of Unique Satellites----")
print(len(satellites_count))

print("\n----List of Unique Satellites----")
for satellite_number in sorted(set(satellites)):  
    print(f"Satellite Number: {satellite_number}")  

print("\n----Number of Files per Satellite----")
for satellite_number, count in sorted(satellites_count.items()):
    print(f"Satellite Number: {satellite_number}, Count: {count}")

pattern2 = r'SN\d+'

recent_to_oldest = sorted(ann_datetime, key=lambda x: x[1], reverse=True)  

most_recent_filename = recent_to_oldest[0][0]
match = re.search(pattern2, most_recent_filename)

print("\n----Satellite Number of Most Recent File----")
if match:
    sn_number = match.group(0)
    print(f"Satellite Number: {sn_number}, File Name: {most_recent_filename}")


----Number of Unique Satellites----
9

----List of Unique Satellites----
Satellite Number: 24
Satellite Number: 26
Satellite Number: 27
Satellite Number: 28
Satellite Number: 29
Satellite Number: 30
Satellite Number: 31
Satellite Number: 33
Satellite Number: 43

----Number of Files per Satellite----
Satellite Number: 24, Count: 26
Satellite Number: 26, Count: 37
Satellite Number: 27, Count: 29
Satellite Number: 28, Count: 16
Satellite Number: 29, Count: 22
Satellite Number: 30, Count: 18
Satellite Number: 31, Count: 19
Satellite Number: 33, Count: 16
Satellite Number: 43, Count: 11

----Satellite Number of Most Recent File----
Satellite Number: SN29, File Name: 20240623_215120_SN29_QUICKVIEW_VISUAL_1_7_0_SATL-2KM-10N_596_4134.txt


### **Question 7. How many unique regions there are.**

In [55]:
import os
import re
from collections import Counter

pattern = r'(\d{8})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt'

annotations = '/Users/biancabaldonado/Desktop/ESADE/session_4/annotations'

annotations_list = []

for root, dirs, files in os.walk(annotations):
    for filename in files:
        if filename.endswith('.txt'):  
            annotations_list.append(os.path.join(root, filename))

regions = []

for annotation in annotations_list:
    filename = os.path.basename(annotation) 

    match = re.match(pattern, filename) 
    if match:
        _, _, _, _, unique_region = match.groups() 
        regions.append(unique_region) 

regions_count = Counter(regions)

print("\n----Number of Unique Regions:----")
print(len(regions_count))  

print("\n----Value Count per Unique Region:----")
for region, count in sorted(regions_count.items()):
    print(f"Region: {region}, Count: {count}")



----Number of Unique Regions:----
137

----Value Count per Unique Region:----
Region: SATL-2KM-10N_542_4168, Count: 2
Region: SATL-2KM-10N_544_4186, Count: 1
Region: SATL-2KM-10N_546_4206, Count: 1
Region: SATL-2KM-10N_550_4202, Count: 1
Region: SATL-2KM-10N_552_4162, Count: 3
Region: SATL-2KM-10N_552_4164, Count: 2
Region: SATL-2KM-10N_554_4162, Count: 3
Region: SATL-2KM-10N_554_4172, Count: 2
Region: SATL-2KM-10N_556_4176, Count: 1
Region: SATL-2KM-10N_556_4178, Count: 1
Region: SATL-2KM-10N_556_4180, Count: 1
Region: SATL-2KM-10N_558_4184, Count: 1
Region: SATL-2KM-10N_560_4178, Count: 2
Region: SATL-2KM-10N_562_4170, Count: 1
Region: SATL-2KM-10N_562_4178, Count: 1
Region: SATL-2KM-10N_562_4196, Count: 1
Region: SATL-2KM-10N_564_4194, Count: 1
Region: SATL-2KM-10N_568_4176, Count: 1
Region: SATL-2KM-10N_594_4136, Count: 1
Region: SATL-2KM-10N_596_4134, Count: 1
Region: SATL-2KM-10N_602_4148, Count: 1
Region: SATL-2KM-10N_630_4262, Count: 1
Region: SATL-2KM-10N_630_4264, Count: 1
R

### **Undoing Moving of Files to Folder**
- Make sure to run this code at the end so that when the code is ran again, files are moved back to its original location


In [56]:
import os
import shutil

annotations = '/Users/biancabaldonado/Desktop/ESADE/session_4/annotations'
monthsfolder = ["January", "February", "March", "April", "May", "June", 
                "July", "August", "September", "October", "November", "December"]

moved_back_files = 0

# Iterate through each month folder and move files back to main annotations directory
for month in monthsfolder:
    month_folder_path = os.path.join(annotations, month)
    if os.path.exists(month_folder_path):  # Check if the monthly folder exists
        for filename in os.listdir(month_folder_path):
            file_path = os.path.join(month_folder_path, filename)
            if filename.endswith('.txt'):  # Only move .txt files
                dest_path = os.path.join(annotations, filename)
                shutil.move(file_path, dest_path)
                moved_back_files += 1

print(f"----Undo Complete: {moved_back_files} Files Moved Back to Main Annotations Directory----")


----Undo Complete: 194 Files Moved Back to Main Annotations Directory----
