In [2]:
import cv2
import numpy as np
import torch
import torchvision.transforms as transforms
from torchvision.models.video import r3d_18
from collections import deque
import time

class FootballActionRecognizer:
    def __init__(self):
        # Load pre-trained 3D ResNet model
        self.model = r3d_18(pretrained=True)
        self.model.eval()
        
        # Transform for preprocessing video frames
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((112, 112)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.43216, 0.394666, 0.37645], 
                               std=[0.22803, 0.22145, 0.216989])
        ])
        
        # Buffer to store frames for temporal analysis
        self.frame_buffer = deque(maxlen=16)  # 16 frames for 3D CNN
        
        # Football-specific action classes (simplified)
        self.actions = [
            "running", "kicking", "passing", "dribbling", "tackling",
            "jumping", "heading", "goalkeeping", "celebrating", "walking"
        ]
        
        # Object detection for players (using YOLO-like detection)
        self.player_detector = cv2.HOGDescriptor()
        self.player_detector.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
        
    def detect_players(self, frame):
        """Detect players in the frame using HOG descriptor"""
        try:
            # Convert to RGB if needed
            if len(frame.shape) == 3 and frame.shape[2] == 3:
                detection_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            else:
                detection_frame = frame
                
            players, weights = self.player_detector.detectMultiScale(
                detection_frame, winStride=(8, 8), padding=(32, 32), scale=1.05
            )
            return players
        except Exception as e:
            print(f"Player detection error: {e}")
            return []
    
    def analyze_motion(self, prev_frame, curr_frame):
        """Analyze motion between frames using frame difference"""
        if prev_frame is None:
            return "stationary"
        
        # Convert to grayscale
        prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
        curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
        
        # Calculate frame difference
        diff = cv2.absdiff(prev_gray, curr_gray)
        
        # Apply threshold to get binary image
        _, thresh = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY)
        
        # Calculate motion as percentage of changed pixels
        motion_pixels = cv2.countNonZero(thresh)
        total_pixels = thresh.shape[0] * thresh.shape[1]
        motion_percentage = (motion_pixels / total_pixels) * 100
        
        # Classify motion based on percentage of changed pixels
        if motion_percentage > 15:
            return "fast_movement"
        elif motion_percentage > 8:
            return "moderate_movement"  
        elif motion_percentage > 3:
            return "slow_movement"
        else:
            return "stationary"
    
    def classify_action(self, frame_sequence):
        """Classify action from frame sequence"""
        if len(frame_sequence) < 8:
            return "analyzing"
        
        # Simple heuristic-based classification for football actions
        # In practice, you'd use a trained model on football-specific data
        
        # Analyze frame differences and patterns
        motion_scores = []
        for i in range(1, len(frame_sequence)):
            diff = cv2.absdiff(frame_sequence[i-1], frame_sequence[i])
            motion_score = np.mean(diff)
            motion_scores.append(motion_score)
        
        avg_motion = np.mean(motion_scores)
        motion_variance = np.var(motion_scores)
        
        # Classify based on motion patterns
        if avg_motion > 30 and motion_variance > 100:
            return "kicking_or_tackling"
        elif avg_motion > 20:
            return "running_or_dribbling"
        elif avg_motion > 10:
            return "walking_or_positioning"
        else:
            return "stationary_or_waiting"
    
    def generate_description(self, players, action, motion_type, frame_count):
        """Generate natural language description"""
        timestamp = frame_count / 30.0  # Assuming 30 FPS
        
        descriptions = []
        
        # Player count description
        if len(players) > 0:
            descriptions.append(f"At {timestamp:.1f}s: {len(players)} player(s) detected")
        
        # Action description
        action_descriptions = {
            "kicking_or_tackling": "Players engaged in intense action - likely kicking ball or tackling",
            "running_or_dribbling": "Players running across the field, possibly dribbling",
            "walking_or_positioning": "Players moving into position or walking",
            "stationary_or_waiting": "Players standing or waiting for play to develop",
            "analyzing": "Analyzing player movements..."
        }
        
        if action in action_descriptions:
            descriptions.append(action_descriptions[action])
        
        # Motion description
        motion_descriptions = {
            "fast_movement": "with rapid movement detected",
            "moderate_movement": "with moderate pace movement",
            "slow_movement": "with slow, controlled movement",
            "stationary": "with minimal movement"
        }
        
        if motion_type in motion_descriptions:
            descriptions.append(motion_descriptions[motion_type])
        
        return " ".join(descriptions)

def analyze_football_video(video_path):
    """Main function to analyze football video"""
    recognizer = FootballActionRecognizer()
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps
    
    print(f"Analyzing football video:")
    print(f"Duration: {duration:.1f} seconds")
    print(f"FPS: {fps}")
    print(f"Total frames: {total_frames}")
    print("-" * 50)
    
    frame_count = 0
    prev_frame = None
    frame_sequence = deque(maxlen=8)
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            frame_count += 1
            
            # Resize frame for processing
            processed_frame = cv2.resize(frame, (640, 360))
            frame_sequence.append(processed_frame)
            
            # Process every 12 frames (every 0.5 seconds at 24fps)
            if frame_count % 12 == 0:
                try:
                    # Detect players
                    players = recognizer.detect_players(processed_frame)
                    
                    # Analyze motion
                    motion_type = recognizer.analyze_motion(prev_frame, processed_frame)
                    
                    # Classify action
                    action = recognizer.classify_action(list(frame_sequence))
                    
                    # Generate description
                    description = recognizer.generate_description(
                        players, action, motion_type, frame_count
                    )
                    
                    print(description)
                    
                except Exception as e:
                    print(f"Processing error at frame {frame_count}: {e}")
                    continue
            
            prev_frame = processed_frame.copy()
            
            # Show progress
            if frame_count % 48 == 0:  # Every 2 seconds at 24fps
                progress = (frame_count / total_frames) * 100
                print(f"Progress: {progress:.1f}%")
                
    except Exception as e:
        print(f"Error during video processing: {e}")
    finally:
        cap.release()
        print(f"\nProcessed {frame_count} frames")
        print("Video analysis complete!")

# Usage
if __name__ == "__main__":
    video_path = r"C:\Users\Hello\Downloads\2932301-uhd_4096_2160_24fps.mp4"
    
    print("Football Video Action Recognition System")
    print("=" * 50)
    
    try:
        analyze_football_video(video_path)
    except Exception as e:
        print(f"Error analyzing video: {str(e)}")
        print("\nTroubleshooting tips:")
        print("1. Make sure the video file exists at the specified path")
        print("2. Install required packages: pip install opencv-python torch torchvision")
        print("3. Ensure the video format is supported by OpenCV")

Football Video Action Recognition System
Analyzing football video:
Duration: 15.3 seconds
FPS: 24.0
Total frames: 367
--------------------------------------------------
At 0.4s: 2 player(s) detected Players standing or waiting for play to develop with slow, controlled movement
At 0.8s: 1 player(s) detected Players standing or waiting for play to develop with moderate pace movement
At 1.2s: 3 player(s) detected Players moving into position or walking with moderate pace movement
At 1.6s: 3 player(s) detected Players moving into position or walking with moderate pace movement
Progress: 13.1%
At 2.0s: 2 player(s) detected Players moving into position or walking with rapid movement detected
At 2.4s: 2 player(s) detected Players moving into position or walking with rapid movement detected
At 2.8s: 2 player(s) detected Players moving into position or walking with rapid movement detected
At 3.2s: 2 player(s) detected Players running across the field, possibly dribbling with rapid movement dete

In [6]:
import cv2
import numpy as np
import torch
import torchvision.transforms as transforms
from torchvision.models.video import r3d_18
from collections import deque
import time

class FootballActionRecognizer:
    def __init__(self):
        # Load pre-trained 3D ResNet model
        self.model = r3d_18(pretrained=True)
        self.model.eval()
        
        # Transform for preprocessing video frames
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((112, 112)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.43216, 0.394666, 0.37645], 
                               std=[0.22803, 0.22145, 0.216989])
        ])
        
        # Buffer to store frames for temporal analysis
        self.frame_buffer = deque(maxlen=16)  # 16 frames for 3D CNN
        
        # Football-specific action classes (simplified)
        self.actions = [
            "running", "kicking", "passing", "dribbling", "tackling",
            "jumping", "heading", "goalkeeping", "celebrating", "walking"
        ]
        
        # Object detection for players (using YOLO-like detection)
        self.player_detector = cv2.HOGDescriptor()
        self.player_detector.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
        
    def detect_players(self, frame):
        """Detect players in the frame using HOG descriptor"""
        try:
            # Convert to RGB if needed
            if len(frame.shape) == 3 and frame.shape[2] == 3:
                detection_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            else:
                detection_frame = frame
                
            players, weights = self.player_detector.detectMultiScale(
                detection_frame, winStride=(8, 8), padding=(32, 32), scale=1.05
            )
            return players
        except Exception as e:
            print(f"Player detection error: {e}")
            return []
    
    def analyze_motion(self, prev_frame, curr_frame):
        """Analyze motion between frames using frame difference"""
        if prev_frame is None:
            return "stationary"
        
        # Convert to grayscale
        prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
        curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
        
        # Calculate frame difference
        diff = cv2.absdiff(prev_gray, curr_gray)
        
        # Apply threshold to get binary image
        _, thresh = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY)
        
        # Calculate motion as percentage of changed pixels
        motion_pixels = cv2.countNonZero(thresh)
        total_pixels = thresh.shape[0] * thresh.shape[1]
        motion_percentage = (motion_pixels / total_pixels) * 100
        
        # Classify motion based on percentage of changed pixels
        if motion_percentage > 15:
            return "fast_movement"
        elif motion_percentage > 8:
            return "moderate_movement"  
        elif motion_percentage > 3:
            return "slow_movement"
        else:
            return "stationary"
    
    def classify_action(self, frame_sequence):
        """Classify action from frame sequence"""
        if len(frame_sequence) < 8:
            return "analyzing"
        
        # Simple heuristic-based classification for football actions
        # In practice, you'd use a trained model on football-specific data
        
        # Analyze frame differences and patterns
        motion_scores = []
        for i in range(1, len(frame_sequence)):
            diff = cv2.absdiff(frame_sequence[i-1], frame_sequence[i])
            motion_score = np.mean(diff)
            motion_scores.append(motion_score)
        
        avg_motion = np.mean(motion_scores)
        motion_variance = np.var(motion_scores)
        
        # Classify based on motion patterns
        if avg_motion > 30 and motion_variance > 100:
            return "kicking_or_tackling"
        elif avg_motion > 20:
            return "running_or_dribbling"
        elif avg_motion > 10:
            return "walking_or_positioning"
        else:
            return "stationary_or_waiting"
    
    def generate_description(self, players, action, motion_type, frame_count):
        """Generate natural language description"""
        timestamp = frame_count / 30.0  # Assuming 30 FPS
        
        descriptions = []
        
        # Player count description
        if len(players) > 0:
            descriptions.append(f"At {timestamp:.1f}s: {len(players)} player(s) detected")
        
        # Action description
        action_descriptions = {
            "kicking_or_tackling": "Players engaged in intense action - likely kicking ball or tackling",
            "running_or_dribbling": "Players running across the field, possibly dribbling",
            "walking_or_positioning": "Players moving into position or walking",
            "stationary_or_waiting": "Players standing or waiting for play to develop",
            "analyzing": "Analyzing player movements..."
        }
        
        if action in action_descriptions:
            descriptions.append(action_descriptions[action])
        
        # Motion description
        motion_descriptions = {
            "fast_movement": "with rapid movement detected",
            "moderate_movement": "with moderate pace movement",
            "slow_movement": "with slow, controlled movement",
            "stationary": "with minimal movement"
        }
        
        if motion_type in motion_descriptions:
            descriptions.append(motion_descriptions[motion_type])
        
        return " ".join(descriptions)

def analyze_football_video(video_path):
    """Main function to analyze football video with display"""
    recognizer = FootballActionRecognizer()
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps
    
    print(f"Analyzing football video:")
    print(f"Duration: {duration:.1f} seconds")
    print(f"FPS: {fps}")
    print(f"Total frames: {total_frames}")
    print("Press 'q' to quit, 'space' to pause/resume")
    print("-" * 50)
    
    frame_count = 0
    prev_frame = None
    frame_sequence = deque(maxlen=8)
    
    # Variables to store current analysis results
    current_players = []
    current_action = "analyzing"
    current_motion = "stationary"
    current_description = "Starting analysis..."
    
    paused = False
    
    try:
        while True:
            if not paused:
                ret, frame = cap.read()
                if not ret:
                    break
                
                frame_count += 1
                
                # Resize frame for processing
                processed_frame = cv2.resize(frame, (640, 360))
                frame_sequence.append(processed_frame)
                
                # Process every 12 frames (every 0.5 seconds at 24fps)
                if frame_count % 12 == 0:
                    try:
                        # Detect players
                        current_players = recognizer.detect_players(processed_frame)
                        
                        # Analyze motion
                        current_motion = recognizer.analyze_motion(prev_frame, processed_frame)
                        
                        # Classify action
                        current_action = recognizer.classify_action(list(frame_sequence))
                        
                        # Generate description
                        current_description = recognizer.generate_description(
                            current_players, current_action, current_motion, frame_count
                        )
                        
                        print(current_description)
                        
                    except Exception as e:
                        print(f"Processing error at frame {frame_count}: {e}")
                
                prev_frame = processed_frame.copy()
            
            # Create display frame (resize original frame for display)
            display_frame = cv2.resize(frame if not paused else display_frame, (1200, 675))
            
            # Draw player bounding boxes
            for (x, y, w, h) in current_players:
                # Scale coordinates to display frame size
                scale_x = 1200 / 640
                scale_y = 675 / 360
                x_display = int(x * scale_x)
                y_display = int(y * scale_y)
                w_display = int(w * scale_x)
                h_display = int(h * scale_y)
                
                cv2.rectangle(display_frame, (x_display, y_display), 
                            (x_display + w_display, y_display + h_display), (0, 255, 0), 2)
                cv2.putText(display_frame, 'Player', (x_display, y_display - 10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            
            # Add overlay with analysis information
            overlay = display_frame.copy()
            cv2.rectangle(overlay, (10, 10), (600, 150), (0, 0, 0), -1)
            cv2.addWeighted(overlay, 0.7, display_frame, 0.3, 0, display_frame)
            
            # Add text information
            timestamp = frame_count / fps
            info_lines = [
                f"Time: {timestamp:.1f}s / {duration:.1f}s",
                f"Frame: {frame_count}/{total_frames}",
                f"Players detected: {len(current_players)}",
                f"Action: {current_action.replace('_', ' ').title()}",
                f"Motion: {current_motion.replace('_', ' ').title()}",
                "Press SPACE to pause, Q to quit"
            ]
            
            y_offset = 25
            for line in info_lines:
                cv2.putText(display_frame, line, (20, y_offset),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                y_offset += 20
            
            # Show the frame
            cv2.imshow('Football Action Recognition', display_frame)
            
            # Handle key presses
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
            elif key == ord(' '):  # Space bar to pause/resume
                paused = not paused
                if paused:
                    print("Video paused. Press SPACE to resume.")
                else:
                    print("Video resumed.")
            
            # Control playback speed to match original FPS
            if not paused:
                time.sleep(1.0 / fps)
                
    except Exception as e:
        print(f"Error during video processing: {e}")
    finally:
        cap.release()
        cv2.destroyAllWindows()
        print(f"\nProcessed {frame_count} frames")
        print("Video analysis complete!")

# Usage
if __name__ == "__main__":
    video_path = r"C:\Users\Hello\Downloads\2932301-uhd_4096_2160_24fps.mp4"
    
    print("Football Video Action Recognition System")
    print("=" * 50)
    
    try:
        analyze_football_video(video_path)
    except Exception as e:
        print(f"Error analyzing video: {str(e)}")
        print("\nTroubleshooting tips:")
        print("1. Make sure the video file exists at the specified path")
        print("2. Install required packages: pip install opencv-python torch torchvision")
        print("3. Ensure the video format is supported by OpenCV")

Football Video Action Recognition System
Analyzing football video:
Duration: 15.3 seconds
FPS: 24.0
Total frames: 367
Press 'q' to quit, 'space' to pause/resume
--------------------------------------------------
At 0.4s: 2 player(s) detected Players standing or waiting for play to develop with slow, controlled movement
At 0.8s: 1 player(s) detected Players standing or waiting for play to develop with moderate pace movement
At 1.2s: 3 player(s) detected Players moving into position or walking with moderate pace movement
At 1.6s: 3 player(s) detected Players moving into position or walking with moderate pace movement
At 2.0s: 2 player(s) detected Players moving into position or walking with rapid movement detected
At 2.4s: 2 player(s) detected Players moving into position or walking with rapid movement detected
At 2.8s: 2 player(s) detected Players moving into position or walking with rapid movement detected
At 3.2s: 2 player(s) detected Players running across the field, possibly dribbli