<a href="https://colab.research.google.com/github/cyberust/HumanSynergyAnalysis_EN/blob/main/HumanSynergyAnalysis_en.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# ======================================================================
#
#  Human Synergy Analysis System V3 - Final Stable Version (English)
#
#  Features:
#  - Centralized parameter management via the CONFIG object.
#  - Consistent execution via a single analysis pipeline.
#  - Decoupled visualization and file saving to prevent resource issues.
#
# ======================================================================

# ===============================================================
# STEP 1: Library Installation
# ===============================================================
!pip install sentence-transformers pandas numpy scipy networkx plotly scikit-learn --quiet
print("✅ Libraries installed successfully.")

# ===============================================================
# STEP 2: Library Imports
# ===============================================================
import os
import re
import pandas as pd
import numpy as np
import networkx as nx
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')
import time # Import time for adding delays

from scipy.integrate import odeint
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from datetime import datetime
from google.colab import drive
print("✅ Libraries imported successfully.")


# ===============================================================
# STEP 3: Centralized Analysis Configuration
# ===============================================================
CONFIG = {
    'data': {
        'base_path': "/content/drive/MyDrive/",
        'names': ["arale_cohen", "yanay_geva", "yasuyuki_sakane"],
        'display_names': ["Arale Cohen", "Yanay Geva", "Yasuyuki Sakane"]
    },
    'profile_analysis': {
        'embedding_model': 'all-MiniLM-L6-v2',
        'experience_default': 10,
        'network_keywords': ['network', 'ecosystem', 'connections', 'board member', 'partner'],
        'network_multiplier': 20
    },
    'synergy_calculation': {
        'weights': {
            'complementarity': 0.5,
            'experience': 0.2,
            'network': 0.2
        },
        'diversity_bonus': 1.2,
        'cultural_backgrounds': {'Arale Cohen': 'Western', 'Yanay Geva': 'Western', 'Yasuyuki Sakane': 'Eastern'}
    },
    'dynamic_model': {
        'alpha': np.array([0.4, 0.35, 0.45]),
        'beta_multiplier': 0.1,
        'K': np.array([12.0, 12.0, 12.0]),
        'initial_state': [1.0, 1.0, 1.0],
        'time_horizon': 24
    },
    'game_theory': {
        'synergy_value_multiplier': 5
    },
    'business_forecast': {
        'revenue_base_multiplier': 10,
        'revenue_synergy_multiplier': 5,
        'innovation_synergy_multiplier': 10,
    },
    'output': {
        'base_path': "/content/drive/MyDrive/",
        'folder_prefix': "synergy_semantic_analysis_"
    }
}
print("✅ Analysis configuration defined.")


# ===============================================================
# STEP 4: Definition of Analysis Classes
# ===============================================================

class DataLoader:
    def __init__(self, config):
        self.config = config['data']

    def load_profiles(self):
        # Check if already mounted and unmount if necessary
        mountpoint = '/content/drive'
        if os.path.exists(mountpoint) and os.path.ismount(mountpoint):
            try:
                drive.flush_and_unmount()
                print("✅ Drive unmounted successfully.")
            except Exception as e:
                print(f"⚠️ Error during unmount: {e}")

        # Attempt to remove any residual files in the mountpoint
        if os.path.exists(mountpoint):
             try:
                 os.system(f'rm -rf {mountpoint}/*')
                 print("✅ Cleaned up mountpoint.")
             except Exception as e:
                 print(f"⚠️ Error during mountpoint cleanup: {e}")

        # Mount the drive
        drive.mount(mountpoint, force_remount=True)
        print("✅ Drive mounted successfully.")

        profiles = {}
        for name, display_name in zip(self.config['names'], self.config['display_names']):
            try:
                with open(f"{self.config['base_path']}{name}_profile.txt", 'r', encoding='utf-8') as f:
                    profiles[display_name] = f.read()
            except FileNotFoundError:
                profiles[display_name] = "Profile data not available."
        return profiles

class SemanticProfileAnalyzer:
    def __init__(self, config):
        self.config = config['profile_analysis']
        self.model = SentenceTransformer(self.config['embedding_model'])

    def analyze(self, profiles_dict):
        analysis_results = {}
        names = list(profiles_dict.keys())
        texts = list(profiles_dict.values())
        embeddings = self.model.encode(texts, show_progress_bar=False)

        for i, name in enumerate(names):
            analysis_results[name] = {
                'embedding': embeddings[i],
                'experience_years': self._extract_experience(texts[i]),
                'network_strength': self._extract_network_strength(texts[i])
            }
        return analysis_results

    def _extract_experience(self, text):
        matches = re.findall(r'(\d+)\+?\s*years?', text.lower())
        return max([int(m) for m in matches]) if matches else self.config['experience_default']

    def _extract_network_strength(self, text):
        score = sum(text.lower().count(k) for k in self.config['network_keywords'])
        return min(score * self.config['network_multiplier'], 100)

class SynergyCalculator:
    def __init__(self, config):
        self.config = config['synergy_calculation']

    def calculate(self, analysis_data):
        names = list(analysis_data.keys())
        num_ppl = len(names)

        embeddings = np.array([analysis_data[n]['embedding'] for n in names])
        comp_matrix = 1 - cosine_similarity(embeddings)

        experiences = [analysis_data[n]['experience_years'] for n in names]
        exp_matrix = np.zeros((num_ppl, num_ppl))
        for i, j in np.ndindex(exp_matrix.shape):
            if i != j: exp_matrix[i, j] = np.exp(-0.1 * abs(experiences[i] - experiences[j]))

        networks = [analysis_data[n]['network_strength'] for n in names]
        net_matrix = np.zeros((num_ppl, num_ppl))
        for i, j in np.ndindex(net_matrix.shape):
            if i != j: net_matrix[i, j] = (networks[i] * networks[j]) / 10000.0

        w = self.config['weights']
        total_synergy = (w['complementarity'] * comp_matrix +
                         w['experience'] * exp_matrix +
                         w['network'] * net_matrix)

        backgrounds = self.config['cultural_backgrounds']
        for i, n1 in enumerate(names):
            for j, n2 in enumerate(names):
                if backgrounds.get(n1) != backgrounds.get(n2):
                    total_synergy[i, j] *= self.config['diversity_bonus']

        np.fill_diagonal(total_synergy, 0)
        return {'total_synergy': total_synergy, 'skill_complementarity': comp_matrix}

class DynamicSystemModel:
    def __init__(self, config):
        self.config = config['dynamic_model']

    def simulate(self, synergy_matrix):
        c = self.config
        alpha, K = c['alpha'], c['K']
        beta = synergy_matrix * c['beta_multiplier']
        t = np.linspace(0, c['time_horizon'], c['time_horizon'] * 4)
        solution = odeint(self._system_dynamics, c['initial_state'], t, args=(alpha, beta, K))
        return t, solution

    def _system_dynamics(self, state, t, alpha, beta, K):
        x = np.array(state)
        dxdt = (alpha * x + np.dot(beta, x)) * (1 - x / K)
        return dxdt

class GameTheoryAnalyzer:
    def __init__(self, config):
        self.config = config['game_theory']

    def calculate_shapley(self, analysis_data, synergy_matrix):
        """Calculates Shapley values for each member."""
        from math import factorial
        from itertools import combinations

        names = list(analysis_data.keys())
        n_players = len(names)
        shapley_values = np.zeros(n_players)

        # Prepare for contribution calculation
        player_indices = list(range(n_players))
        memo = {} # Use memoization to speed up calculations

        for i in player_indices:
            for coalition_size in range(n_players):
                for coalition_tuple in combinations(player_indices, coalition_size):
                    if i in coalition_tuple:
                        continue

                    # Calculate coalition value (using memoization)
                    s_without_i = tuple(sorted(coalition_tuple))
                    if s_without_i not in memo:
                        memo[s_without_i] = self._get_coalition_value(s_without_i, analysis_data, synergy_matrix)

                    s_with_i = tuple(sorted(list(s_without_i) + [i]))
                    if s_with_i not in memo:
                        memo[s_with_i] = self._get_coalition_value(s_with_i, analysis_data, synergy_matrix)

                    marginal_contribution = memo[s_with_i] - memo[s_without_i]

                    # Calculate coefficient and add to Shapley value
                    weight = (factorial(coalition_size) * factorial(n_players - coalition_size - 1)) / factorial(n_players)
                    shapley_values[i] += weight * marginal_contribution

        return shapley_values

    def _get_coalition_value(self, indices, analysis_data, synergy_matrix):
        """Calculates the value of a given coalition."""
        if not indices:
            return 0

        names = list(analysis_data.keys())

        # V3.3 Fix: Base value is calculated from the magnitude (L2 norm) of the semantic vector.
        # This reflects the richness or strength of an individual's profile.
        base_value = sum(np.linalg.norm(analysis_data[names[i]]['embedding']) for i in indices)

        # Calculate synergy value within the coalition
        synergy_value = sum(synergy_matrix[i, j] for i in indices for j in indices if i != j)

        # Return the final coalition value
        return base_value + synergy_value * self.config['synergy_value_multiplier']

class VisualizationGenerator:
    def __init__(self, config):
        self.config = config['data']

    def generate_all_figures(self, results):
        figures = {}
        names = self.config['display_names']

        figures['synergy_heatmap'] = px.imshow(results['synergy']['total_synergy'], x=names, y=names, text_auto='.3f',
                                               labels=dict(x="Member", y="Member", color="Synergy Score"), color_continuous_scale='Blues',
                                               title='V3: Overall Synergy Matrix')

        df_growth = pd.DataFrame(results['simulation_solution'], columns=names)
        df_growth['Time (Months)'] = results['simulation_time_axis']
        figures['growth_trajectory'] = px.line(df_growth, x='Time (Months)', y=names, title='V3: Dynamic Growth Trajectory Simulation')

        df_biz = results['business_forecast']
        fig_biz = px.line(df_biz, x='time_months', y=['revenue_growth_rate', 'innovation_index'],
                          title='V3: Business Metrics Forecast', facet_row="variable", labels={"variable":"", "time_months": "Time (Months)"})
        fig_biz.update_yaxes(matches=None)
        figures['business_dashboard'] = fig_biz

        figures['shapley_values'] = px.bar(x=names, y=results['shapley_values'], text_auto='.2f',
                                           labels={'x':'Member', 'y':'Value Contribution'}, title='V3: Value Contribution (Shapley Values)')

        G = nx.from_numpy_array(results['synergy']['total_synergy'])
        pos = nx.spring_layout(G, k=1.5, iterations=50)
        edge_traces = [go.Scatter(x=[pos[edge[0]][0], pos[edge[1]][0], None], y=[pos[edge[0]][1], pos[edge[1]][1], None],
                                  mode='lines', line=dict(width=edge[2]['weight']*10, color='#888'),
                                  hoverinfo='text', text=f"Synergy: {edge[2]['weight']:.3f}")
                       for edge in G.edges(data=True)]
        node_trace = go.Scatter(x=[pos[n][0] for n in G.nodes()], y=[pos[n][1] for n in G.nodes()], mode='markers+text',
                                text=[names[i] for i in G.nodes()], textposition="top center",
                                marker=dict(size=[results['analysis'][n]['network_strength']/5 for n in names],
                                            color=['#FF6B6B', '#4ECDC4', '#45B7D1']))
        fig_net = go.Figure(data=edge_traces + [node_trace], layout=go.Layout(title='V3: Team Interaction Network', showlegend=False,
                                                                             xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                                                                             yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
        figures['network_graph'] = fig_net

        return figures

# ===============================================================
# STEP 5: Definition and Execution of the Analysis Pipeline
# ===============================================================

class AnalysisPipeline:
    def __init__(self, config):
        self.config = config
        self.data_loader = DataLoader(config)
        self.profile_analyzer = SemanticProfileAnalyzer(config)
        self.synergy_calculator = SynergyCalculator(config)
        self.dynamic_model = DynamicSystemModel(config)
        self.game_theory_analyzer = GameTheoryAnalyzer(config)
        self.viz_generator = VisualizationGenerator(config)
        self.run_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    def run(self):
        print("--- Starting analysis pipeline ---")
        output_folder_path = os.path.join(self.config['output']['base_path'], f"{self.config['output']['folder_prefix']}{self.run_timestamp}")
        os.makedirs(output_folder_path, exist_ok=True)
        print(f"✅ Output directory prepared: '{output_folder_path}'")
        print("   Pausing for 5 seconds for Google Drive synchronization...")
        time.sleep(5)
        print("   ...Resuming execution.")

        profiles = self.data_loader.load_profiles()
        print(f"✅ Loaded {len(profiles)} profiles.")
        analysis_data = self.profile_analyzer.analyze(profiles)
        print("✅ Profile analysis (semantic embedding) complete.")
        synergy_results = self.synergy_calculator.calculate(analysis_data)
        print("✅ Synergy matrix calculation complete.")
        time_axis, solution = self.dynamic_model.simulate(synergy_results['total_synergy'])
        print("✅ Dynamic growth simulation complete.")
        shapley_values = self.game_theory_analyzer.calculate_shapley(analysis_data, synergy_results['total_synergy'])
        print("✅ Shapley value calculation complete.")

        c_biz = self.config['business_forecast']
        perf_data = [{'time_months': time_axis[t],
                      'revenue_growth_rate': np.sum(solution[t]) * c_biz['revenue_base_multiplier'] + (np.sum(synergy_results['total_synergy'] * np.outer(solution[t], solution[t]))/2) * c_biz['revenue_synergy_multiplier'],
                      'innovation_index': (np.sum(synergy_results['total_synergy'] * np.outer(solution[t], solution[t]))/2) * c_biz['innovation_synergy_multiplier']}
                     for t in range(len(time_axis))]
        business_forecast_df = pd.DataFrame(perf_data)
        print("✅ Business metrics forecast complete.")

        results = {'config': self.config, 'profiles': profiles, 'analysis': analysis_data, 'synergy': synergy_results,
                   'simulation_time_axis': time_axis, 'simulation_solution': solution, 'shapley_values': shapley_values,
                   'business_forecast': business_forecast_df}

        results['figures'] = self.viz_generator.generate_all_figures(results)
        print("✅ All graph figure objects generated.")

        self._save_artifacts(results, output_folder_path)
        print("\n--- ✅ Pipeline execution complete ---")
        return results

    def _create_combined_html(self, figures_dict):
        """Generates a single HTML string containing all graphs and their explanations."""

        # Explanations for each chart, targeted at a business/VC audience.
        explanations = {
            'synergy_heatmap': """
                <p><strong>What this shows:</strong> The calculated synergy score between each pair of team members. A higher score (and lighter color) indicates a stronger potential for collaboration.</p>
                <p><strong>How to read it:</strong> This score is a composite metric derived from semantic analysis of skill complementarity, experience similarity, network effects, and a cultural diversity bonus. It helps identify which partnerships are likely to be most fruitful.</p>
            """,
            'growth_trajectory': """
                <p><strong>What this shows:</strong> A 24-month simulation of each member's potential performance growth. This model assumes that growth is driven by individual abilities and boosted by synergistic interactions with their teammates.</p>
                <p><strong>How to read it:</strong> The S-shaped curve represents a realistic growth path that eventually reaches a stable plateau (the 'Carrying Capacity'). This demonstrates the team's long-term potential and the speed at which they can reach peak effectiveness, avoiding unrealistic assumptions of infinite growth.</p>
            """,
            'business_dashboard': """
                <p><strong>What this shows:</strong> A forecast of high-level Key Performance Indicators (KPIs) derived from the team's simulated performance.</p>
                <p><strong>How to read it:</strong> 'revenue_growth_rate' and 'innovation_index' are abstract metrics representing the team's potential impact on top-line growth and new value creation, respectively. The key insight lies in the trend and scale of these projections, translating team synergy into tangible business outcomes.</p>
            """,
            'shapley_values': """
                <p><strong>What this shows:</strong> A fair allocation of the team's total potential value to each member, calculated using the Shapley value method from cooperative game theory.</p>
                <p><strong>How to read it:</strong> A higher value indicates that the member's presence provides a greater marginal contribution to any possible subgroup. This value is derived from their individual profile's semantic richness and their synergistic potential, helping to identify key contributors to the team's success.</p>
            """,
            'network_graph': """
                <p><strong>What this shows:</strong> A visual summary of the team's internal structure and dynamics.</p>
                <p><strong>How to read it:</strong> Each person is a <strong>node (circle)</strong>, with its size corresponding to their Network Strength score. The <strong>edges (lines)</strong> connecting them represent their potential for collaboration, with the line thickness corresponding to their Overall Synergy Score.</p>
            """
        }

        # HTML header and style definitions
        html_string = """
        <html><head><title>V3 Human Synergy Analysis Dashboard</title>
        <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
        <style>
            body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; margin: 2em; color: #333; }
            .chart-container { border: 1px solid #e0e0e0; border-radius: 8px; margin-bottom: 25px; padding: 15px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); overflow: hidden; }
            h1 { color: #1a1a1a; }
            h2 { color: #3a3a3a; border-bottom: 2px solid #f0f0f0; padding-bottom: 8px; }
            .explanation { background-color: #f8f9fa; border-left: 4px solid #0d6efd; padding: 10px 15px; margin-top: 15px; font-size: 14px; line-height: 1.6; }
            .explanation p { margin: 5px 0; }
        </style>
        </head><body><h1>Human Synergy Analysis Dashboard V3</h1>"""

        # Add a container and explanation for each graph
        for name, fig in figures_dict.items():
            title = fig.layout.title.text if fig.layout.title.text else name
            explanation_html = explanations.get(name, "<p>No description available.</p>")

            html_string += f'<div class="chart-container"><h2>{title}</h2>'
            html_string += f'<div id="{name}" style="width:100%; min-height:500px;"></div>'
            html_string += f'<div class="explanation">{explanation_html}</div></div>'

        # Add the JavaScript to render each graph
        html_string += "<script>\n" + "".join([f"Plotly.newPlot('{name}', {fig.to_json()});\n" for name, fig in figures_dict.items()]) + "</script></body></html>"
        return html_string

    def _save_artifacts(self, results, output_folder_path):
        # 1. Save the combined HTML dashboard
        combined_html_content = self._create_combined_html(results['figures'])
        dashboard_filename = "synergy_dashboard_V3.html"
        dashboard_full_path = os.path.join(output_folder_path, dashboard_filename)
        try:
            with open(dashboard_full_path, 'w', encoding='utf-8') as f: f.write(combined_html_content)
            print(f"✅ Combined dashboard saved as HTML to '{dashboard_full_path}'")
        except Exception as e: print(f"⚠️ Error saving combined dashboard: {e}")

        # 2. Save the Markdown report
        names = self.config['data']['display_names']
        report_str = f"# Human Synergy Analysis Report V3 ({self.run_timestamp})\n\n"
        report_str += "## 1. Overall Synergy Analysis\n" + "The overall synergy matrix is as follows:\n" + pd.DataFrame(results['synergy']['total_synergy'], columns=names, index=names).to_markdown() + "\n\n"
        report_str += "## 2. Skill Complementarity Analysis (Semantic-based)\n" + "Semantic complementarity of profiles (closer to 1 is more complementary):\n" + pd.DataFrame(results['synergy']['skill_complementarity'], columns=names, index=names).to_markdown() + "\n\n"
        report_str += "## 3. Value Contribution Analysis (Shapley Values)\n" + "Each member's contribution to the total value created by the team is as follows:\n" + "".join([f"- {name}: {val:.3f}\n" for name, val in zip(names, results['shapley_values'])]) + "\n"
        report_str += "## 4. Summary\n" + "This analysis (V3) calculates more realistic synergy and contribution values by directly comparing the semantic content of each member's profile.\n" + "Notably, skill complementarity is a result of capturing the nuances of the entire text, rather than simple keyword matching.\n"

        report_filename = "analysis_report_V3.md"
        report_full_path = os.path.join(output_folder_path, report_filename)
        with open(report_full_path, 'w', encoding='utf-8') as f: f.write(report_str)
        print(f"✅ Detailed analysis report saved to '{report_full_path}'")

# Execute the pipeline
pipeline = AnalysisPipeline(CONFIG)
results = pipeline.run()

✅ Libraries installed successfully.
✅ Libraries imported successfully.
✅ Analysis configuration defined.
--- Starting analysis pipeline ---
✅ Output directory prepared: '/content/drive/MyDrive/synergy_semantic_analysis_20250620_231828'
   Pausing for 5 seconds for Google Drive synchronization...
   ...Resuming execution.
✅ Drive unmounted successfully.
Mounted at /content/drive
✅ Drive mounted successfully.
✅ Loaded 3 profiles.
✅ Profile analysis (semantic embedding) complete.
✅ Synergy matrix calculation complete.
✅ Dynamic growth simulation complete.
✅ Shapley value calculation complete.
✅ Business metrics forecast complete.
✅ All graph figure objects generated.
✅ Combined dashboard saved as HTML to '/content/drive/MyDrive/synergy_semantic_analysis_20250620_231828/synergy_dashboard_V3.html'
✅ Detailed analysis report saved to '/content/drive/MyDrive/synergy_semantic_analysis_20250620_231828/analysis_report_V3.md'

--- ✅ Pipeline execution complete ---
