In [None]:
# Neural Graffiti Layer Injected Directly into Gemma (no retraining, live adapter mod)

!pip install transformers torch accelerate

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer

# Hugging Face Token
hf_token = ""

# Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it", token=hf_token)

# Load Base Model
base_model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-3-1b-it",
    torch_dtype=torch.float16,
    device_map="auto",
    token=hf_token
).eval()

# Neural Graffiti Spray Layer
# ---------------------------
# The SprayLayer is a simple dynamic state mechanism inspired by (but not replicating) liquid neural networks.
# It maintains a persistent internal state vector that is updated over time based on new input embeddings.
# The update rule is a form of neural feedback:
#     dx = -λ * (state - W(x))
# This allows the layer to evolve with each input, retaining a trace of prior interactions.
# The resulting "spray vector" reflects the model's internal memory drift and is used to modulate
# the hidden activations of the base transformer, injecting a sense of continuity and influence
# across otherwise disconnected prompts — like tagging each thought with memory residue.
class SprayLayer(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.W = nn.Linear(dim, dim)
        self.lambda_ = nn.Parameter(torch.ones(dim) * 0.1)
        self.register_buffer('state', torch.zeros(dim))

    def forward(self, x):
        dx = -self.lambda_ * (self.state - self.W(x))
        self.state = self.state + dx
        return self.state

# Graffiti Adapter Module
class GraffitiAdapter(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.spray = SprayLayer(hidden_size).half()
        self.alpha = nn.Parameter(torch.tensor(0.2).half())

    def forward(self, hidden, memory_embed):
        spray_vector = self.spray(memory_embed)
        return hidden + self.alpha * spray_vector.unsqueeze(0).unsqueeze(1)

# Memory Functions
memory_bank = []

def store_memory(embedding, text):
    memory_bank.append((embedding.detach().clone(), text))

def recall_memory(query_embedding, top_k=3):
    if not memory_bank:
        return []
    similarities = [
        (F.cosine_similarity(query_embedding, mem[0], dim=0), mem[0])
        for mem in memory_bank
    ]
    return [mem for _, mem in sorted(similarities, reverse=True)[:top_k]]

def fuse_embeddings(current, recalled):
    if recalled:
        vectors = [current] + recalled
        return torch.mean(torch.stack(vectors), dim=0)
    return current

# Wrapped Gemma Model with Graffiti Adapter
class GraffitiWrappedModel(nn.Module):
    def __init__(self, base_model, graffiti_adapter):
        super().__init__()
        self.base_model = base_model
        self.graffiti_adapter = graffiti_adapter

    def forward(self, input_ids, memory_embed=None, **kwargs):
        outputs = self.base_model.model(
            input_ids=input_ids,
            output_hidden_states=True,
            **kwargs
        )

        hidden_states = outputs.hidden_states[-1]

        if memory_embed is not None:
            hidden_states = self.graffiti_adapter(hidden_states, memory_embed)

        logits = self.base_model.lm_head(hidden_states)
        return logits

# Initialize Adapter and Model
graffiti_adapter = GraffitiAdapter(hidden_size=base_model.config.hidden_size).to(base_model.device)
model = GraffitiWrappedModel(base_model, graffiti_adapter).eval()

# Graffiti Text Generator
@torch.no_grad()
def graffiti_generate(user_input, conversation_history="", max_new_tokens=100):
    prompt = f"{conversation_history}<start_of_turn>user\n{user_input}\n<end_of_turn>\n<start_of_turn>model\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(base_model.device)

    outputs = base_model.model(**inputs, output_hidden_states=True)
    hidden = outputs.hidden_states[-1].squeeze(0)
    sentence_embedding = torch.mean(hidden, dim=0)
    recalled = recall_memory(sentence_embedding)
    fused = fuse_embeddings(sentence_embedding, recalled)
    spray_vector = graffiti_adapter.spray(fused)
    store_memory(spray_vector, user_input)

    print("\n🧠 Neural Graffiti injected into model.")
    print(f"   • Recalled memories: {len(recalled)}")
    print(f"   • Spray state: {graffiti_adapter.spray.state.mean().item():.4f}")

    logits = model(input_ids=inputs.input_ids, memory_embed=spray_vector)
    next_tokens = torch.argmax(logits[:, -1, :], dim=-1).unsqueeze(0)

    generated = torch.cat([inputs.input_ids, next_tokens], dim=1)
    for _ in range(max_new_tokens - 1):
        logits = model(input_ids=generated, memory_embed=spray_vector)
        next_token = torch.argmax(logits[:, -1, :], dim=-1)
        generated = torch.cat([generated, next_token.unsqueeze(0)], dim=1)

    return tokenizer.decode(generated[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True).strip()

# Graffiti Chat Loop
def graffiti_chat():
    print("\n🔁 Graffiti-Gemma Chat (Neural Graffiti mode) — type 'quit' to exit\n")
    conversation_history = ""

    while True:
        user_input = input("You: ")
        if user_input.lower().strip() == 'quit':
            print("Goodbye!")
            break

        response = graffiti_generate(user_input, conversation_history)
        print(f"Graffiti-Gemma: {response}\n")

        conversation_history += f"<start_of_turn>user\n{user_input}\n<end_of_turn>\n<start_of_turn>model\n{response}\n<end_of_turn>\n"

graffiti_chat()
