<a href="https://colab.research.google.com/github/hasancsesu/Civil_Engineering_LLM_Assistant/blob/main/Civil_Eng_Q%26A_Asst_New1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Definitive Version Synced for L4 GPU
!pip install -q -U numpy==1.26.4
!pip install -q -U fsspec==2024.9.0
!pip install -q -U triton
!pip install -q -U bitsandbytes==0.45.1
!pip install -q -U transformers==4.46.2
!pip install -q -U peft==0.12.0
!pip install -q -U accelerate==0.34.2
!pip install -q -U datasets==3.1.0
!pip install -q -U trl==0.12.0

In [None]:
# Cell 2: Verification Triton and GPU
import torch
try:
    import triton
    print(f"Triton version: {triton.__version__} detected.")
except ImportError:
    print("Triton still missing!")

print(f"Bfloat16 Hardware Support: {torch.cuda.is_bf16_supported()}")
from huggingface_hub import notebook_login
notebook_login()

In [None]:
# Cell 3: Loading Model with Triton/BnB Integration
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "mistralai/Mistral-7B-v0.1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation="sdpa", #  SDPA requirement
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print("\n‚úÖ Success! Model loaded on L4 without Triton errors.")

In [None]:
# Cell 4: LoRA Configuration for Mistral-7B
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# 1. Prepare model for 4-bit training (handles gradient checkpointing, etc.)
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

# 2. Defining LoRA parameters
# 'r' is the rank. 16 is a good balance between memory and learning capacity.
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj"
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# 3. Application the LoRA adapter
model = get_peft_model(model, peft_config)

# 4. Verification of how many parameters we are actually training
model.print_trainable_parameters()

In [None]:
# Cell 5: Dataset Loading and Prompt Formatting
from datasets import load_dataset

# OPTION A: Loading a local JSONL file (uncomment if you have your file uploaded)
# dataset = load_dataset("json", data_files="civil_engineering_data.jsonl", split="train")

# OPTION B: Example formatting function
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['instruction'])):
        # Standard Mistral-style instruction format
        text = f"<s>[INST] {example['instruction'][i]} [/INST] {example['response'][i]}</s>"
        output_texts.append(text)
    return output_texts

print("Dataset logic initialized. Ready to map your data.")

In [None]:
# Cell 6: Defining Data and Forcing the 'text' column
from datasets import Dataset

# 1. My training data (Named as example here)
example = [
    {
        "instruction": "What is bearing capacity of soil?",
        "input": "",
        "output": "Bearing capacity is the maximum load per unit area which the soil can carry safely without shear failure and without excessive settlement. It is a key concept in foundation design."
    },
    {
        "instruction": "Explain the difference between dead loads and live loads in structural engineering.",
        "input": "",
        "output": "Dead loads are permanent, static loads such as self-weight of structural elements, finishes, and fixed equipment. Live loads are variable or moving loads such as occupants, furniture, vehicles, and stored materials, which may change over time."
    },
    {
        "instruction": "What is the purpose of reinforcement in reinforced concrete?",
        "input": "",
        "output": "Reinforcement, usually steel bars or meshes, is provided in concrete to resist tensile and sometimes compressive stresses. Concrete is strong in compression but weak in tension, so reinforcement allows the composite material to safely carry bending and tensile forces."
    },
    {
        "instruction": "Briefly explain the difference between working stress design and limit state design.",
        "input": "",
        "output": "Working stress design is based on elastic behavior and uses a factor of safety applied to permissible stresses. Limit state design considers ultimate strength and serviceability limit states separately and applies partial safety factors to loads and material strengths, providing a more rational and economical design."
    },
    {
        "instruction": "What is a shear failure in soil, and why is it important in foundation design?",
        "input": "",
        "output": "Shear failure in soil occurs when the stresses induced by loads exceed the shear strength of the soil, causing a slip or failure surface. It is crucial in foundation design because exceeding the soil‚Äôs shear strength can lead to sudden and catastrophic settlement or tilting of structures."
    },
    {
        "instruction": "Define slump test and its significance in concrete work.",
        "input": "",
        "output": "The slump test is a simple field test used to measure the consistency and workability of fresh concrete. A standard cone is filled with concrete, lifted, and the decrease in height (slump) is measured. Proper slump ensures the concrete is neither too stiff nor too fluid for placement and compaction."
    },
    {
        "instruction": "What is the difference between flexible pavement and rigid pavement?",
        "input": "",
        "output": "Flexible pavement, usually made of asphalt layers, distributes loads through a layered system and deforms under traffic. Rigid pavement, made of concrete slabs, has high flexural strength and distributes loads over a wider area with relatively little deformation. Flexible pavements are more sensitive to subgrade quality, while rigid pavements are more durable but costlier."
    },
    {
        "instruction": "Explain freeboard in the context of hydraulic structures.",
        "input": "",
        "output": "Freeboard is the vertical distance between the maximum design water level and the top of a channel, dam, or embankment. It provides a safety margin against wave action, settlement, and unforeseen increases in water level."
    },
    {
        "instruction": "What is factor of safety in geotechnical engineering?",
        "input": "",
        "output": "Factor of safety is the ratio of the ultimate capacity (e.g., ultimate bearing capacity or resisting moment) to the actual working load or effect. It accounts for uncertainties in soil properties, loading, and analysis methods, ensuring a margin of safety against failure."
    },
    {
        "instruction": "As a civil engineer, how would you briefly advise a client about controlling cracks in reinforced concrete slabs?",
        "input": "",
        "output": "Crack control in reinforced concrete slabs involves proper detailing of reinforcement (adequate bar spacing and cover), using appropriate concrete mix with controlled water‚Äìcement ratio, proper curing, and avoiding restraint conditions and rapid temperature or moisture changes. Good construction practices and joint detailing also help limit crack width."
    },
    {
        "instruction": "What is the ultimate bearing capacity of soil?",
        "input": "",
        "output": "The ultimate bearing capacity is the maximum pressure a soil can sustain just before shear failure occurs. It represents the theoretical upper limit of stress that can be applied to the soil under a foundation."
    },
    {
        "instruction": "List the primary factors that influence the bearing capacity of soil.",
        "input": "",
        "output": "Key factors include soil type, density, shear strength parameters (cohesion and angle of internal friction), groundwater conditions, footing shape and size, depth of foundation, and load inclination."
    },
    {
        "instruction": "What are the types of shear failure in soils?",
        "input": "",
        "output": "The three primary types are: 1. General shear failure ‚Äì distinct failure surface, sudden collapse. 2. Local shear failure ‚Äì partial failure, no well-defined surface. and, 3. Punching shear failure ‚Äì occurs in loose or soft soils under heavily loaded small foundations."
    },
    {
        "instruction": "What is swelling soil?",
        "input": "",
        "output": "Swelling soils, also called expansive soils, contain minerals such as montmorillonite that absorb water and expand significantly, causing uplift, cracking, and structural damage."
    },
    {
        "instruction": "What is consolidation in soil mechanics?",
        "input": "",
        "output": "Consolidation is the gradual reduction in soil volume caused by expulsion of pore water under sustained load. It governs long-term settlement in clayey soils."
    },
    {
        "instruction": "How is safe bearing capacity defined?",
        "input": "",
        "output": "Safe bearing capacity is the maximum pressure that can be safely applied to the soil without causing shear failure and while keeping settlement within acceptable limits. It is computed by dividing ultimate capacity by a factor of safety."
    },
    {
        "instruction": "Differentiate between compaction and consolidation.",
        "input": "",
        "output": "Compaction is the reduction in soil volume by expelling air through mechanical effort (instant). Consolidation is the reduction of volume by water expulsion under load (time-dependent)."
    },
    {
        "instruction": "What does the angle of internal friction represent?",
        "input": "",
        "output": "It represents the shear resistance provided by soil due to interparticle friction. Sands have high friction angles, while clays typically have low values."
    },
    {
        "instruction": "Differentiate between shallow and deep foundations.",
        "input": "",
        "output": "Shallow foundations transmit loads to the soil at relatively small depths (1‚Äì3m). Examples: isolated footings, raft foundations. Deep foundations transfer loads to deeper, stronger layers (10‚Äì60m). Examples: piles, drilled shafts."
    },
    {
        "instruction": "List common types of piles used in civil engineering.",
        "input": "",
        "output": "Common types include friction piles, end-bearing piles, composite piles, timber piles, steel H-piles, precast concrete piles, and bored cast-in-situ piles."
    },
    {
        "instruction": "What is negative skin friction on piles?",
        "input": "",
        "output": "Negative skin friction occurs when surrounding soil settles more than the pile, dragging the pile downward and adding additional load. It often happens in soft clays or areas of soil consolidation."
    },
    {
        "instruction": "When is a raft foundation preferred?",
        "input": "",
        "output": "Rafts are used when soil is weak, differential settlement must be minimized, or building columns are closely spaced. They provide uniform load distribution."
    },
    {
        "instruction": "What is a pile cap?",
        "input": "",
        "output": "A pile cap is a reinforced concrete block that ties multiple piles together and transfers structural loads to the pile group."
    },
    {
        "instruction": "What are the types of settlement in foundations?",
        "input": "",
        "output": "Immediate settlement (elastic), consolidation settlement (time-dependent), and secondary compression settlement."
    },
    {
        "instruction": "instruction: What is reinforced concrete?",
        "input": "",
        "output": "Reinforced concrete is concrete that contains steel bars or mesh to resist tension and improve overall strength."
    },
    {
        "instruction": "instruction: What is cover in RCC?",
        "input": "",
        "output": "Cover is the minimum thickness of concrete over reinforcement to protect steel from corrosion and fire."
    },
    {
        "instruction": "Define slump",
        "input": "",
        "output": "Slump is a simple field test that measures the workability of fresh concrete."
    },
    {
        "instruction": "What causes cracks in concrete?",
        "input": "",
        "output": "Cracks can be caused by shrinkage, temperature changes, poor curing, excessive loading, or insufficient reinforcement."
    },
    {
        "instruction": "What is curing?",
        "input": "",
        "output": "Curing is the process of keeping concrete moist to ensure proper strength development."
    },
    {
        "instruction": "What is a one-way slab?",
        "input": "",
        "output": "A one-way slab mainly bends in one direction because it is supported on two opposite sides."
    },
    {
        "instruction": " Why is steel used in buildings?",
        "input": "",
        "output": ": Steel is strong, durable, and can carry large loads with relatively small sections."
    },
    {
        "instruction": " What is structural steel?",
        "input": "",
        "output": ": Structural steel is steel specifically shaped and treated for use in building frameworks."
    },
    {
        "instruction": " What is buckling?",
        "input": "",
        "output": ": Buckling is the sudden sideways failure of a structural member under compressive load."
    },
    {
        "instruction": " What is a truss?",
        "input": "",
        "output": ": A truss is a framework of connected members arranged in triangles to efficiently carry loads."
    },
    {
        "instruction": " What is corrosion?",
        "input": "",
        "output": ": Corrosion is the chemical deterioration of steel due to moisture and oxygen."
    },
    {
        "instruction": " What is a steel column?",
        "input": "",
        "output": ": A steel column is a vertical member designed to carry compressive loads."
    },
    {
        "instruction": " What is welding?",
        "input": "",
        "output": ": Welding is the process of joining metal parts by melting and fusing them."
    },
    {
        "instruction": "What is a flexible pavement?",
        "input": "",
        "output": " Flexible pavement is made of asphalt layers and distributes load through gradual deformation."
    },
    {
        "instruction": "What is a rigid pavement?",
        "input": "",
        "output": " Rigid pavement uses concrete slabs that distribute load over a wide area with minimal deformation."
    },
    {
        "instruction": "What is a camber?",
        "input": "",
        "output": "Camber is the upward slope of a road surface to drain rainwater."
    },
    {
        "instruction": "What is traffic volume?",
        "input": "",
        "output": " Traffic volume is the number of vehicles passing a point in a specified time."
    },
    {
        "instruction": "What is a median?",
        "input": "",
        "output": " A median is a divider between opposite lanes to improve safety."
    },
    {
        "instruction": "What is a super-elevation?",
        "input": "",
        "output": " Super-elevation is the raising of the outer edge of a road on a curve to counter vehicle overturning."
        },
    {
        "instruction": "What is grade?",
        "input": "",
        "output": " Grade is the slope of a road measured as a percentage."
    },
    {
        "instruction": "What is a pothole?",
        "input": "",
        "output": "A pothole is a depression in the road surface caused by water and repeated traffic loads."
    },
    {
        "instruction": "What is bitumen?",
        "input": "",
        "output": " Bitumen is a black, sticky binder used in asphalt pavement. "
    },
    {
        "instruction": "What is discharge?",
        "input": "",
        "output": "Discharge is the volume of water flowing through a channel per unit time."
    },
    {
        "instruction": "What is a weir?",
        "input": "",
        "output": "A weir is a small barrier across a river used to measure or control flow."
    },
    {
        "instruction": "What is freeboard?",
        "input": "",
        "output": "Freeboard is the vertical gap between water level and the top of a structure."
    },
    {
        "instruction": "What is runoff?",
        "input": "",
        "output": "Runoff is the portion of rainfall that flows over the ground surface."
    },
    {
        "instruction": "What is infiltration?",
        "input": "",
        "output": "Infiltration is the process of water entering the soil."
    },
    {
        "instruction": "What is a channel?",
        "input": "",
        "output": "A channel is a natural or artificial path through which water flows."
    },
    {
        "instruction": "What is erosion?",
        "input": "",
        "output": "Erosion is the removal of soil by flowing water or wind."
    },
    {
        "instruction": "What is a hydrograph?",
        "input": "",
        "output": "A hydrograph is a graph that shows how river flow varies with time."
    },
    {
        "instruction": "What is a project schedule?",
        "input": "",
        "output": "A project schedule shows the sequence and timing of construction activities."
    },
    {
        "instruction": "What is cost estimation?",
        "input": "",
        "output": "Cost estimation predicts the expenses required for a construction project."
    },
    {
        "instruction": "What is quality control?",
        "input": "",
        "output": "Quality control ensures materials and work meet required standards."
    },
    {
        "instruction": "instruction: What is safety management?",
        "input": "",
        "output": "Safety management focuses on preventing accidents on construction sites."
    },
    {
        "instruction": "What is procurement?",
        "input": "",
        "output": "Procurement is the process of purchasing materials and services for a project."
    },
    {
        "instruction": "What is a contract?",
        "input": "",
        "output": "A contract is a legal agreement defining responsibilities and obligations."
    },
    {
        "instruction": "What is a BOQ?",
        "input": "",
        "output": "A BOQ is a Bill of Quantities listing materials and their amounts."
    },
    {
        "instruction": "What is wastewater?",
        "input": "",
        "output": "Wastewater is used water containing impurities from homes or industries."
    },
    {
        "instruction": "What is a septic tank?",
        "input": "",
        "output": "A septic tank is an underground chamber that treats household wastewater."
    },
    {
        "instruction": "What is water treatment?",
        "input": "",
        "output": "Water treatment removes contaminants to make water safe for use."
    },
    {
        "instruction": "What is solid waste?",
        "input": "",
        "output": "Solid waste is any garbage or refuse produced by human activity."
    },
    {
        "instruction": "What is air pollution?",
        "input": "",
        "output": "Air pollution occurs when harmful substances enter the atmosphere."
    },
    {
        "instruction": "What is chlorination?",
        "input": "",
        "output": "BOD is the amount of oxygen required by microorganisms to break down organic matter."
    },
    {
        "instruction": "What is a load?",
        "input": "",
        "output": "A load is any external force applied to a structure."
    },
    {
        "instruction": "What is bending moment?",
        "input": "",
        "output": "Bending moment is the internal force that causes a beam to bend."
    },
    {
        "instruction": "What is shear force?",
        "input": "",
        "output": "Shear force is the force that acts parallel to the cross-section of a beam."
    },
    {
        "instruction": "What is a simply supported beam?",
        "input": "",
        "output": "A simply supported beam is supported at both ends but free to rotate."
    },
    {
        "instruction": "What is a fixed support?",
        "input": "",
        "output": "A fixed support restricts all movements of a structural member."
    },
    {
        "instruction": "What is deflection?",
        "input": "",
        "output": "Deflection is the vertical displacement of a structural element under load."
    },
    {
        "instruction": "What is stiffness?",
        "input": "",
        "output": "Stiffness is a structure‚Äôs resistance to deformation."
    },
    {
        "instruction": "What is a frame?",
        "input": "",
        "output": "A frame is a structure with beams and columns connected together."
    },
    {
        "instruction": "What is leveling?",
        "input": "",
        "output": "Leveling is the process of determining height differences between points."
    },
    {
        "instruction": "What is a benchmark?",
        "input": "",
        "output": "A benchmark is a known elevation point used as a reference."
    },
    {
        "instruction": "What is a total station?",
        "input": "",
        "output": "A total station is an electronic device used to measure angles and distances."
    },
    {
        "instruction": "What is GPS in surveying?",
        "input": "",
        "output": "GPS is a satellite-based system used to determine precise locations."
    },
    {
        "instruction": "What is contouring?",
        "input": "",
        "output": "Contouring involves connecting points of equal elevation on a map."
    },
    {
        "instruction": "What is a traverse?",
        "input": "",
        "output": "A traverse is a series of connected survey lines whose lengths and angles are measured."
    },
    {
        "instruction": "What is EDM?",
        "input": "",
        "output": "EDM stands for Electronic Distance Measurement used for accurate distance readings."
    },
    {
        "instruction": "What is a theodolite?",
        "input": "",
        "output": "A theodolite is an instrument used to measure horizontal and vertical angles."
    }
]

# 2. Converting to Dataset object
train_dataset = Dataset.from_list(example)

def apply_template(sample):
    full_prompt = sample['instruction']
    if sample['input']:
        full_prompt += f"\nContext: {sample['input']}"
    return {"text": f"<s>[INST] {full_prompt} [/INST] {sample['output']}</s>"}

train_dataset = train_dataset.map(apply_template)
print("‚úÖ train_dataset is now ready in memory.")

In [None]:
# Cell 7: Launching the Training for Civil_Eng_Q&A_Asst_2026
from trl import SFTTrainer, SFTConfig

sft_config = SFTConfig(
    output_dir="./civil_eng_mistral_2026",
    max_seq_length=2048,
    dataset_text_field="text",
    packing=False,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    bf16=True, # L4 Native precision
    logging_steps=1,
    num_train_epochs=5,
    save_strategy="steps",
    save_steps=50,
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    args=sft_config,
    peft_config=peft_config,
)

print("üöÄ Training starting... Loop broken.")
trainer.train()

In [None]:
# Cell 8: Inference Test
prompt = "<s>[INST] What is the primary purpose of a 'Slump Test' in concrete? [/INST]"

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=150, temperature=0.7)

print("\n--- Assistant Response ---")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
# Cell 9: Saving the LoRA Adapter locally
model.save_pretrained("./civil_eng_mistral_adapter")
tokenizer.save_pretrained("./civil_eng_mistral_adapter")
print("‚úÖ Adapter saved to the folder: ./civil_eng_mistral_adapter")

In [None]:
#Cell 10: Permanent Storage in Google drive
from google.colab import drive
import shutil
import os

# 1. Mounting Google Drive
drive.mount('/content/drive')

# 2. Defining the destination path in your Drive
# This creates a folder named 'AI_Models' in your MyDrive
drive_path = "/content/drive/MyDrive/AI_Models/civil_eng_mistral_adapter"

# 3. Creating the directory if it doesn't exist
os.makedirs(os.path.dirname(drive_path), exist_ok=True)

# 4. Copying the folder from Colab's temporary storage to your Drive
if os.path.exists("./civil_eng_mistral_adapter"):
    shutil.copytree("./civil_eng_mistral_adapter", drive_path, dirs_exist_ok=True)
    print(f"‚úÖ Success! Your adapter is now safely stored at: {drive_path}")
else:
    print("‚ùå Error: Local adapter folder not found. Did you run Cell 9?")

Mounted at /content/drive
‚úÖ Success! Your adapter is now safely stored at: /content/drive/MyDrive/AI_Models/civil_eng_mistral_adapter


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
