## exercise 13. Date and File Handling

##### exercise 1 Date and Time

##### Get Current Day, Month, Year, Hour, Minute, and Timestamp

In [30]:
from datetime import datetime

# Current date and time
now = datetime.now()

# Extract details
current_day = now.day
current_month = now.month
current_year = now.year
current_hour = now.hour
current_minute = now.minute
current_second = now.second
current_timestamp = now.timestamp()

# Output
print(f"Day: {current_day}, Month: {current_month}, Year: {current_year}")
print(f"Hour: {current_hour}, Minute: {current_minute}, Second: {current_second}")
print(f"Timestamp: {current_timestamp}")


Day: 21, Month: 1, Year: 2025
Hour: 15, Minute: 17, Second: 28
Timestamp: 1737452848.966817


#### Format the Current Date

In [31]:
# Format the date
formatted_date = now.strftime("%m/%d/%Y, %H:%M:%S")

# Output
print("Formatted Date:", formatted_date)


Formatted Date: 01/21/2025, 15:17:28


#### Convert String "21 January, 2025" to a Time Object

In [32]:
# Given time string
time_string = "21 January, 2025"

# Convert to datetime object
converted_time = datetime.strptime(time_string, "%d %B, %Y")

# Output
print("Converted Time:", converted_time)


Converted Time: 2025-01-21 00:00:00


#### Calculate the Time Difference Between Now and New Year

In [33]:
# Define the next New Year's date
new_year = datetime(current_year + 1, 1, 1)

# Calculate the difference
time_difference = new_year - now

# Output
print("Time until New Year:", time_difference)


Time until New Year: 344 days, 8:42:31.033183


#### Calculate the Time Difference Between 1 January 1970 and Now

In [34]:
# Define the epoch time (1 January 1970)
epoch_time = datetime(1970, 1, 1)

# Calculate the difference
time_since_epoch = now - epoch_time

# Output
print("Time since 1 January 1970:", time_since_epoch)
print("Time in seconds:", time_since_epoch.total_seconds())


Time since 1 January 1970: 20109 days, 15:17:28.966817
Time in seconds: 1737472648.966817


**Applications of the datetime Module**
* Examples of Applications:
* Time Series Analysis:  
* 
1. **Analyzing trends over time** (e.g., stock prices, weather patterns).
2. **Timestamp Activities in Applications:**
* Logging user actions (e.g., login, logout).
3. **Scheduling Events**:
4. **Automating reminders**, notifications, or tasks.
5. **Adding Posts to Blogs:**
6. **Displaying the creation time of a blog post.**
7. Date Manipulations:
* Adding/subtracting days, weeks, or months to/from a given date.

## exercise 2

#### File handling 
Find the Ten Most Frequent Words

In [35]:
from collections import Counter
import re

def find_most_frequent_words(text, n=10):
    # Clean the text
    words = re.findall(r'\b\w+\b', text.lower())  # Extract words and convert to lowercase
    word_counts = Counter(words)
    return word_counts.most_common(n)

# Example usage for Obama's, Michelle's, Trump's, and Melania's speeches
def process_speech(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    return find_most_frequent_words(text)

# Replace paths with the actual file paths
obama_speech = process_speech(r'C:\Users\AL-ANSARY\Documents\Ahmad 30-Days-of-Python\obama_speech.txt')
michelle_speech = process_speech(r'C:\Users\AL-ANSARY\Documents\Ahmad 30-Days-of-Python\michelle_obama_speech.txt')
trump_speech = process_speech(r'C:\Users\AL-ANSARY\Documents\Ahmad 30-Days-of-Python\donald_speech.txt')
melania_speech = process_speech(r'C:\Users\AL-ANSARY\Documents\Ahmad 30-Days-of-Python\melina_trump_speech.txt')

print("Obama:", obama_speech)
print("Michelle:", michelle_speech)
print("Trump:", trump_speech)
print("Melania:", melania_speech)


Obama: [('the', 129), ('and', 113), ('of', 81), ('to', 70), ('our', 67), ('we', 62), ('that', 50), ('a', 48), ('is', 36), ('in', 25)]
Michelle: [('and', 96), ('the', 85), ('to', 84), ('that', 50), ('of', 46), ('â', 42), ('a', 41), ('he', 37), ('in', 36), ('my', 28)]
Trump: [('the', 65), ('and', 59), ('we', 43), ('will', 40), ('of', 38), ('to', 32), ('our', 30), ('is', 20), ('america', 16), ('â', 14)]
Melania: [('and', 77), ('to', 55), ('the', 52), ('is', 29), ('i', 28), ('for', 27), ('of', 25), ('that', 24), ('a', 22), ('you', 21)]


#### Check Similarity Between Two Texts

In [36]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def clean_text(text):
    return re.sub(r'[^\w\s]', '', text.lower())

def remove_support_words(text, stop_words_path):
    with open(stop_words_path, 'r') as file:
        stop_words = file.read().split()
    words = text.split()
    filtered_words = [word for word in words if word not in stop_words]
    return ' '.join(filtered_words)

def check_text_similarity(text1, text2, stop_words_path):
    text1_cleaned = clean_text(text1)
    text2_cleaned = clean_text(text2)
    
    text1_filtered = remove_support_words(text1_cleaned, stop_words_path)
    text2_filtered = remove_support_words(text2_cleaned, stop_words_path)
    
    vectorizer = CountVectorizer().fit_transform([text1_filtered, text2_filtered])
    vectors = vectorizer.toarray()
    
    similarity = cosine_similarity(vectors)
    return similarity[0, 1]

# Example usage for Michelle's and Melania's speeches
with open(r'C:\Users\AL-ANSARY\Documents\Ahmad 30-Days-of-Python\michelle_obama_speech.txt', 'r') as michelle_file:
    michelle_text = michelle_file.read()

with open(r'C:\Users\AL-ANSARY\Documents\Ahmad 30-Days-of-Python\melina_trump_speech.txt', 'r') as melania_file:
    melania_text = melania_file.read()

similarity = check_text_similarity(michelle_text, melania_text, r'C:\Users\AL-ANSARY\Documents\Ahmad 30-Days-of-Python\stop_words.txt')
print("Similarity between Michelle's and Melania's speeches:", similarity)


Similarity between Michelle's and Melania's speeches: 0.9003770944829639


#### Analyze csv file, Count Specific Lines

In [37]:
import csv

def count_lines(file_path, keywords):
    counts = {key: 0 for key in keywords}
    
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            line = ' '.join(row).lower()
            for key in keywords:
                if key.lower() in line:
                    counts[key] += 1
    return counts

def count_java_exclusive(file_path):
    count = 0
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            line = ' '.join(row).lower()
            if 'java' in line and 'javascript' not in line:
                count += 1
    return count

# Example usage
hacker_news_path = r'C:\\Users\\AL-ANSARY\\Documents\\Ahmad 30-Days-of-Python\\hacker_news.csv'
keywords = ['python', 'javascript', 'java']
line_counts = count_lines(hacker_news_path, keywords)
java_exclusive_count = count_java_exclusive(hacker_news_path)

print("Line counts for keywords:", line_counts)
print("Lines with Java but not JavaScript:", java_exclusive_count)


Line counts for keywords: {'python': 179, 'javascript': 184, 'java': 251}
Lines with Java but not JavaScript: 67
