# Simple University Patent Analysis

## Streamlined analysis with minimal complexity

In [None]:
# Simple setup
import pandas as pd
import requests
import base64
import time
import json
import os
from datetime import datetime

print("✅ Setup complete")

In [None]:
# Load data simply
try:
    df = pd.read_csv('data/EPO_DeepTechFinder_20250513_DE_Uni_Top100.csv', encoding='utf-8')
    print(f"✅ Loaded {len(df)} records")
except:
    try:
        df = pd.read_csv('data/EPO_DeepTechFinder_20250513_DE_Uni_Top100.csv', encoding='latin-1')
        print(f"✅ Loaded {len(df)} records with latin-1")
    except Exception as e:
        print(f"❌ Error: {e}")

print(f"Columns: {list(df.columns)}")
print(f"Universities: {df['University'].nunique()}")

In [None]:
# Select university
university = "Karlsruhe Institute of Technology"
uni_data = df[df['University'] == university]
granted = uni_data[uni_data['Patent_status'] == 'EP granted']

print(f"University: {university}")
print(f"Total patents: {len(uni_data)}")
print(f"Granted patents: {len(granted)}")

# Take first 10 for testing
sample = granted.head(10)
print(f"Sample size: {len(sample)}")

In [None]:
# Simple EPO OPS client
class SimpleOPSClient:
    def __init__(self):
        # Load credentials
        with open('../ipc-ops/.env', 'r') as f:
            lines = f.readlines()
            for line in lines:
                if line.startswith('OPS_KEY='):
                    self.key = line.split('=')[1].strip()
                elif line.startswith('OPS_SECRET='):
                    self.secret = line.split('=')[1].strip()
        
        self.token = None
        print(f"✅ Credentials loaded")
    
    def get_token(self):
        if self.token:
            return self.token
            
        url = "https://ops.epo.org/3.2/auth/accesstoken"
        credentials = f"{self.key}:{self.secret}"
        encoded = base64.b64encode(credentials.encode()).decode()
        
        headers = {
            'Authorization': f'Basic {encoded}',
            'Content-Type': 'application/x-www-form-urlencoded'
        }
        
        response = requests.post(url, headers=headers, data={'grant_type': 'client_credentials'})
        if response.status_code == 200:
            self.token = response.json()['access_token']
            print("✅ Token obtained")
            return self.token
        else:
            print(f"❌ Token error: {response.status_code}")
            return None
    
    def get_patent(self, ep_number):
        if not self.get_token():
            return None
            
        # Clean number
        clean_num = ep_number.replace('EP', '').replace('A', '').replace('B', '')
        
        url = f"https://ops.epo.org/3.2/rest-services/published-data/application/epodoc/EP{clean_num}/biblio"
        headers = {
            'Authorization': f'Bearer {self.token}',
            'Accept': 'application/json'
        }
        
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.json()
        else:
            return None

ops = SimpleOPSClient()

In [None]:
# Test one patent
test_row = sample.iloc[0]
ep_link = test_row['Espacenet_link']
ep_number = ep_link.split('q=')[1]

print(f"Testing: {ep_number}")

result = ops.get_patent(ep_number)
if result:
    print("✅ EPO OPS working")
    print(f"Data keys: {list(result.keys())[:5]}")
else:
    print("❌ EPO OPS failed")

In [None]:
# Simple analysis of sample patents
results = []

for idx, row in sample.iterrows():
    ep_link = row['Espacenet_link']
    ep_number = ep_link.split('q=')[1]
    
    print(f"Processing {ep_number}...", end=" ")
    
    data = ops.get_patent(ep_number)
    if data:
        results.append({
            'ep_number': ep_number,
            'title': row['Application_title'],
            'year': row['Filing_year'],
            'ops_data': 'Yes'
        })
        print("✅")
    else:
        results.append({
            'ep_number': ep_number,
            'title': row['Application_title'],
            'year': row['Filing_year'],
            'ops_data': 'No'
        })
        print("❌")
    
    time.sleep(2)  # Rate limiting

print(f"\nProcessed {len(results)} patents")
success_rate = len([r for r in results if r['ops_data'] == 'Yes']) / len(results) * 100
print(f"Success rate: {success_rate:.1f}%")

In [None]:
# Simple results
results_df = pd.DataFrame(results)
print("Analysis Results:")
print(results_df)

# Save results
results_df.to_csv('output/simple_analysis_results.csv', index=False)
print("\n✅ Results saved to output/simple_analysis_results.csv")