In [1]:
# Husayn El Sharif
comment  ="""
Commits files to a git repository
"""

In [2]:
# Cell 1: imports
import os
from pathlib import Path
import csv
from datetime import datetime
import getpass

try:
    import pathspec
except ImportError:
    raise ImportError(
        "Please install pathspec first: pip install pathspec\n"
        "Then re-run this cell."
    )


In [3]:
# user inputs
project_title = "LSTM-and-Georgia-Crop-Forecasting"
project_description = "A LSTM deep learning model that can imitate crop yield and irrigation demand modeling/prediction of DSSAT based on bi-weekly climate data of TMAX, TMIN, SRAD, RAIN during the growing season (16 two-week periods)."
GITHUB_USERNAME = "helsharif"
REPO_REPO_NAME = "LSTM-and-Georgia-Crop-Forecasting"

In [4]:
# Cell 2: helper to load .gitignore patterns

def load_gitignore(gitignore_path: Path):
    """
    Load .gitignore patterns into a pathspec object.
    Returns a pathspec.PathSpec or None if file doesn't exist.
    """
    if not gitignore_path.exists():
        print(f".gitignore not found at: {gitignore_path} (proceeding without it)")
        return None
    
    with gitignore_path.open("r", encoding="utf-8") as f:
        patterns = f.read().splitlines()
    
    # Filter out empty/comment lines
    patterns = [p for p in patterns if p.strip() and not p.strip().startswith("#")]
    
    if not patterns:
        print("No non-comment patterns found in .gitignore; effectively unused.")
        return None
    
    spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
    print(f"Loaded {len(patterns)} patterns from {gitignore_path}")
    return spec


In [5]:
# Scan files, respecting .gitignore and exclusions

import fnmatch


def scan_files(base_dir: Path, gitignore_spec=None):
    """
    Recursively scan files under base_dir, respecting .gitignore,
    excluding explicitly unwanted patterns, and excluding hidden Windows files.
    Returns a list of dicts with metadata, sorted by creation date (earliest ‚Üí latest).
    """
    records = []
    base_dir = base_dir.resolve()

    # Hard-coded exact file exclusions
    EXCLUDED_FILES = {"readme.md"}

    # Wildcard pattern exclusions
    EXCLUDED_PATTERNS = ["file_inventory_*.csv", "*_git_commits.ipynb", ".gitignore"]

    for root, dirs, files in os.walk(base_dir):
        root_path = Path(root)

        # Exclude hidden directories (starting with a dot)
        dirs[:] = [d for d in dirs if not d.startswith(".")]

        # Apply .gitignore rules to directories
        if gitignore_spec is not None:
            pruned_dirs = []
            for d in dirs:
                rel_dir = (root_path / d).relative_to(base_dir).as_posix()
                if not gitignore_spec.match_file(rel_dir):
                    pruned_dirs.append(d)
            dirs[:] = pruned_dirs

        for fname in files:

            # Exclude dotfiles
            if fname.startswith("."):
                continue

            # Skip exact-match exclusions
            if fname.lower() in EXCLUDED_FILES:
                continue

            # Skip wildcard exclusions
            if any(fnmatch.fnmatch(fname, pat) for pat in EXCLUDED_PATTERNS):
                continue

            full_path = root_path / fname
            rel_path = full_path.relative_to(base_dir).as_posix()

            # Skip files ignored by .gitignore
            if gitignore_spec is not None and gitignore_spec.match_file(rel_path):
                continue

            try:
                stat = full_path.stat()
            except FileNotFoundError:
                continue

            # Exclude Windows hidden attribute files
            if hasattr(stat, "st_file_attributes"):
                if stat.st_file_attributes & 0x2:  # FILE_ATTRIBUTE_HIDDEN
                    continue

            created_dt = datetime.fromtimestamp(stat.st_ctime)
            modified_dt = datetime.fromtimestamp(stat.st_mtime)
            filesize = stat.st_size  # <-- NEW LINE

            records.append({
                "relative_path": rel_path,
                "file_name": fname,
                "created": created_dt.isoformat(sep=" ", timespec="seconds"),
                "last_modified": modified_dt.isoformat(sep=" ", timespec="seconds"),
                "filesize_bytes": filesize,   # <-- NEW FIELD
            })

    # Sort by creation date
    records.sort(key=lambda r: r["created"])

    return records





In [6]:
# Cell 4: run scan and write CSV

base_dir = Path(".").resolve()
print(f"Scanning base directory: {base_dir}")

# Ask user for .gitignore path (relative to base_dir)
gitignore_input = ".gitignore"
if not gitignore_input:
    gitignore_input = ".gitignore"

gitignore_path = base_dir / gitignore_input
gitignore_spec = load_gitignore(gitignore_path)

records = scan_files(base_dir, gitignore_spec)
print(f"Found {len(records)} files (after applying .gitignore).")

# Output CSV
default_csv_name = f"file_inventory_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
csv_path_input = default_csv_name
if not csv_path_input:
    csv_path_input = default_csv_name

csv_path = base_dir / csv_path_input

fieldnames = ["relative_path", "file_name", "created", "last_modified", "filesize_bytes"]

with csv_path.open("w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(records)

print(f"CSV written to: {csv_path}")


Scanning base directory: /mnt/c/H_Drive/Workstation/07 Data Science Training/2025_001 LSTM and Georgia Crop Modeling
Loaded 73 patterns from /mnt/c/H_Drive/Workstation/07 Data Science Training/2025_001 LSTM and Georgia Crop Modeling/.gitignore
Found 27 files (after applying .gitignore).
CSV written to: /mnt/c/H_Drive/Workstation/07 Data Science Training/2025_001 LSTM and Georgia Crop Modeling/file_inventory_20251211_121919.csv


In [7]:
# README.md generator

from pathlib import Path
from datetime import datetime

# Ensure 'records' exists from the earlier file scan
if 'records' not in globals() or not records:
    raise ValueError("The variable 'records' was not found. "
                     "Run the file-scan part of the notebook first.")

# Convert created/modified fields to datetime objects
created_dates = [
    datetime.fromisoformat(r["created"]) for r in records
]
modified_dates = [
    datetime.fromisoformat(r["last_modified"]) for r in records
]

earliest_created = min(created_dates)
latest_modified = max(modified_dates)

# === README Creation ===

# Ask user for project title

while not project_title:
    print("Project title cannot be empty.")
    project_title = input("Enter the Project Title for README.md: ").strip()



project_period = (
    f"This project was developed over a period spanning "
    f"**{earliest_created.strftime('%Y-%m-%d')}** to "
    f"**{latest_modified.strftime('%Y-%m-%d')}**, "
    f"based on the earliest file creation timestamp "
    f"and the latest file modification timestamp detected in the project directory."
)

# Build README content
readme_template = f"""# {project_title}

{project_description}

---

### üìÅ Project Overview
This repository contains source code, data, and supporting scripts associated with **{project_title}**.  
{project_period}

The project inventory CSV generated by this notebook documents:
- Relative file paths  
- File names  
- Creation dates (Windows filesystem)  
- Last modified dates  
- (Optional) planned commit metadata  

"""

In [8]:
# Cell 5: Write README.md
# Write README.md

readme_path = Path("README.md")
with readme_path.open("w", encoding="utf-8") as f:
    f.write(readme_template)

print(f"README.md successfully created at: {readme_path.resolve()}")
print(f"Earliest file creation date: {earliest_created}")
print(f"Latest file modification date: {latest_modified}")

README.md successfully created at: /mnt/c/H_Drive/Workstation/07 Data Science Training/2025_001 LSTM and Georgia Crop Modeling/README.md
Earliest file creation date: 2024-05-07 00:30:24
Latest file modification date: 2025-01-14 07:50:48


In [9]:
# Cell 6: git command helper

import subprocess
from pathlib import Path

def run_git(args, cwd=None):
    """
    Run a git command and print stdout/stderr nicely.
    `args` should be a list of arguments, e.g. ["status", "-sb"].
    """
    if cwd is None:
        cwd = Path(".").resolve()
    else:
        cwd = Path(cwd).resolve()
    
    result = subprocess.run(
        ["git", *args],
        cwd=cwd,
        text=True,
        capture_output=True
    )
    
    print(f"$ git {' '.join(args)}")
    if result.stdout:
        print(result.stdout)
    if result.stderr:
        print(result.stderr)
    
    return result


In [10]:
#2Ô∏è‚É£ Initialize repo if needed

project_root = Path(".").resolve()
git_dir = project_root / ".git"

if git_dir.exists():
    print(f"Git repo already initialized at: {project_root}")
else:
    print(f"No .git directory found at {project_root}, initializing new repo...")
    res = run_git(["init"], cwd=project_root)
    if res.returncode != 0:
        raise RuntimeError("Failed to initialize git repository.")


No .git directory found at /mnt/c/H_Drive/Workstation/07 Data Science Training/2025_001 LSTM and Georgia Crop Modeling, initializing new repo...
$ git init
Initialized empty Git repository in /mnt/c/H_Drive/Workstation/07 Data Science Training/2025_001 LSTM and Georgia Crop Modeling/.git/

hint: Using 'master' as the name for the initial branch. This default branch name
hint: will change to "main" in Git 3.0. To configure the initial branch name
hint: call:
hint:
hint: 	git config --global init.defaultBranch <name>
hint:
hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and
hint: 'development'. The just-created branch can be renamed via this command:
hint:
hint: 	git branch -m <name>
hint:
hint: Disable this message with "git config set advice.defaultBranchName false"



In [11]:
# 3Ô∏è‚É£ Set or confirm the remote URL (origin)
# Check if 'origin' already exists
res = run_git(["remote", "-v"], cwd=project_root)

has_origin = False
if res.returncode == 0 and "origin" in res.stdout:
    has_origin = True

if has_origin:
    print("Remote 'origin' already exists.")
    # Optionally show current URL
    _ = run_git(["remote", "get-url", "origin"], cwd=project_root)
else:
    remote_url = f"https://github.com/{GITHUB_USERNAME}/{REPO_REPO_NAME}.git"
    if not remote_url:
        raise ValueError("Remote URL cannot be empty.")
    res = run_git(["remote", "add", "origin", remote_url], cwd=project_root)
    if res.returncode != 0:
        raise RuntimeError("Failed to add remote 'origin'.")


$ git remote -v
$ git remote add origin https://github.com/helsharif/LSTM-and-Georgia-Crop-Forecasting.git


In [12]:
# 4Ô∏è‚É£ Stage everything (respecting .gitignore)
res = run_git(["status", "-sb"], cwd=project_root)  # optional, just to show status

print("\nStaging all tracked/trackable files (honoring .gitignore)...")
res = run_git(["add", "."], cwd=project_root)
if res.returncode != 0:
    raise RuntimeError("Failed to run 'git add .'")


$ git status -sb
## No commits yet on master
?? .gitignore
?? "01 Crop Model Results Agg Spreadsheets/"
?? Notes.docx
?? README.md
?? e01_crop_model_lstm_rainfed_corn.ipynb
?? e01_git_commits.ipynb
?? e02_crop_model_lstm_irrigated_corn.ipynb
?? e03_crop_model_lstm_rainfed_cotton.ipynb
?? e04_crop_model_lstm_irrigated_cotton.ipynb
?? e05_crop_model_lstm_rainfed_peanut.ipynb
?? e06_crop_model_lstm_irrigated_peanut.ipynb
?? e07_crop_model_lstm_rainfed_soybean.ipynb
?? e08_crop_model_lstm_irrigated_soybean.ipynb
?? e09_monthly_agg_spreadsheet_generation.ipynb
?? e11_crop_model_lstm_rainfed_corn_monthly.ipynb
?? e12_crop_model_lstm_irrigated_corn_monthly.ipynb
?? e13_crop_model_lstm_rainfed_cotton_monthly.ipynb
?? e14_crop_model_lstm_irrigated_cotton_monthly.ipynb
?? e15_crop_model_lstm_rainfed_peanut_monthly.ipynb
?? e16_crop_model_lstm_irrigated_peanut_monthly.ipynb
?? e17_crop_model_lstm_rainfed_soybean_monthly.ipynb
?? e18_crop_model_lstm_irrigated_soybean_monthly.ipynb
?? models_monthl

In [13]:
#  5Ô∏è‚É£ Commit with a message from the notebook

# Build default commit message using date span
default_msg = (
    f"Initial import of historical work spanning "
    f"{earliest_created.strftime('%Y-%m-%d')} to {latest_modified.strftime('%Y-%m-%d')}"
)

# Ask user if they want to override the message
commit_msg = default_msg


print(f"Using commit message:\n{commit_msg}\n")

# Perform commit
res = run_git(["commit", "-m", commit_msg], cwd=project_root)

# Handle 'nothing to commit' gracefully
if res.returncode != 0:
    if "nothing to commit" in (res.stdout + res.stderr).lower():
        print("Nothing to commit. Working tree is clean.")
    else:
        raise RuntimeError("Commit failed. See output above.")



Using commit message:
Initial import of historical work spanning 2024-05-07 to 2025-01-14

$ git commit -m Initial import of historical work spanning 2024-05-07 to 2025-01-14
[master (root-commit) 4b59939] Initial import of historical work spanning 2024-05-07 to 2025-01-14
 30 files changed, 34886 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 01 Crop Model Results Agg Spreadsheets/New Crop Model Spreadsheets.zip
 create mode 100644 Notes.docx
 create mode 100644 README.md
 create mode 100644 e01_crop_model_lstm_rainfed_corn.ipynb
 create mode 100644 e01_git_commits.ipynb
 create mode 100644 e02_crop_model_lstm_irrigated_corn.ipynb
 create mode 100644 e03_crop_model_lstm_rainfed_cotton.ipynb
 create mode 100644 e04_crop_model_lstm_irrigated_cotton.ipynb
 create mode 100644 e05_crop_model_lstm_rainfed_peanut.ipynb
 create mode 100644 e06_crop_model_lstm_irrigated_peanut.ipynb
 create mode 100644 e07_crop_model_lstm_rainfed_soybean.ipynb
 create mode 100644 e08_crop_mod

In [14]:
# # Force branch name to 'main' (safe even if already on main)


def run_git(args, cwd=None, allow_prompt=False, timeout=None):
    """
    Run a git command and print stdout/stderr.
    By default, disables interactive prompts so that git fails fast
    instead of hanging waiting for credentials/passphrases.
    """
    if cwd is None:
        cwd = Path(".").resolve()
    else:
        cwd = Path(cwd).resolve()

    env = os.environ.copy()
    if not allow_prompt:
        # Prevent git from prompting for credentials / input
        env["GIT_TERMINAL_PROMPT"] = "0"

    try:
        result = subprocess.run(
            ["git", *args],
            cwd=cwd,
            text=True,
            capture_output=True,
            env=env,
            timeout=timeout  # e.g., timeout=60 if you want
        )
    except subprocess.TimeoutExpired:
        print(f"$ git {' '.join(args)}")
        print("Command timed out. Git may be waiting for credentials or network.")
        raise

    print(f"$ git {' '.join(args)}")
    if result.stdout:
        print(result.stdout)
    if result.stderr:
        print(result.stderr)

    return result


In [17]:
# Force branch name to 'main'
res = run_git(["branch", "-M", "main"], cwd=project_root)
if res.returncode != 0:
    print("Warning: could not rename branch to 'main' (it may already be 'main').")

print("\nPushing to origin main...")
res = run_git(["push", "-u", "origin", "main"], cwd=project_root)

if res.returncode != 0:
    print("Push failed. Common reasons:")
    print("- Remote repo doesn't exist or URL is wrong")
    print("- Authentication/permissions issues (check SSH key or PAT)")
    print("- You haven't completed initial SSH/HTTPS auth in a normal terminal yet")
    raise RuntimeError("git push failed. Check the error output above.")
else:
    print("Push completed successfully.")


$ git branch -M main

Pushing to origin main...
$ git push -u origin main
branch 'main' set up to track 'origin/main'.

Everything up-to-date

Push completed successfully.
