In [3]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Generate sample sales data
num_records = 100
data = {
    "sale_id": range(1, num_records + 1),
    "product": [random.choice(["Laptop", "Phone", "Tablet", "Monitor"]) for _ in range(num_records)],
    "amount": [round(random.uniform(100, 2000), 2) for _ in range(num_records)],
    "timestamp": [(datetime.now() - timedelta(days=random.randint(0, 5))).strftime("%Y-%m-%d %H:%M:%S") for _ in range(num_records)]
}
df = pd.DataFrame(data)
df.to_csv("custom_data.csv", index=False)


In [4]:
import pandas as pd

# Load dataset
df = pd.read_csv("custom_data.csv")

# Show stats
print("Full Dataset Info:")
print(df.info())
print(df.head())

print(f"\nExtracted {len(df)} rows fully.")


Full Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   sale_id    100 non-null    int64  
 1   product    100 non-null    object 
 2   amount     100 non-null    float64
 3   timestamp  100 non-null    object 
dtypes: float64(1), int64(1), object(2)
memory usage: 3.3+ KB
None
   sale_id  product   amount            timestamp
0        1    Phone   613.47  2025-07-03 22:00:59
1        2  Monitor  1378.06  2025-07-01 22:00:59
2        3  Monitor  1507.04  2025-07-01 22:00:59
3        4  Monitor  1593.32  2025-07-05 22:00:59
4        5   Laptop  1057.73  2025-07-05 22:00:59

Extracted 100 rows fully.


In [5]:
from datetime import datetime

# Read last extraction time
try:
    with open("last_extraction.txt", "r") as f:
        last_time = f.read().strip()
        last_time = datetime.strptime(last_time, "%Y-%m-%d %H:%M:%S")
except:
    last_time = datetime.min  # No previous extraction

# Convert column to datetime
df["timestamp"] = pd.to_datetime(df["timestamp"])

# Filter
new_data = df[df["timestamp"] > last_time]
print(f"\nExtracted {len(new_data)} rows incrementally since last check.")
print(new_data)



Extracted 100 rows incrementally since last check.
    sale_id  product   amount           timestamp
0         1    Phone   613.47 2025-07-03 22:00:59
1         2  Monitor  1378.06 2025-07-01 22:00:59
2         3  Monitor  1507.04 2025-07-01 22:00:59
3         4  Monitor  1593.32 2025-07-05 22:00:59
4         5   Laptop  1057.73 2025-07-05 22:00:59
..      ...      ...      ...                 ...
95       96   Tablet  1465.22 2025-07-06 22:00:59
96       97    Phone  1393.42 2025-07-01 22:00:59
97       98    Phone   995.36 2025-07-01 22:00:59
98       99    Phone   116.87 2025-07-06 22:00:59
99      100   Laptop   330.55 2025-07-02 22:00:59

[100 rows x 4 columns]


In [6]:
if not new_data.empty:
    new_latest_time = new_data["timestamp"].max().strftime("%Y-%m-%d %H:%M:%S")
    with open("last_extraction.txt", "w") as f:
        f.write(new_latest_time)
    print(f"\nUpdated last extraction timestamp to {new_latest_time}.")



Updated last extraction timestamp to 2025-07-06 22:00:59.
