Line | Count | Source |
1 | use anyhow::Result; | |
2 | use code_guardian_core::{CustomDetectorManager, DistributedCoordinator, WorkerConfig}; | |
3 | use std::path::PathBuf; | |
4 | ||
5 | use crate::cli_definitions::{CustomDetectorAction, DistributedAction, IncrementalAction}; | |
6 | ||
7 | 5 | pub fn handle_custom_detectors(action: CustomDetectorAction) -> Result<()> { |
8 | 5 | match action { |
9 | CustomDetectorAction::List => { | |
10 | 0 | let manager = CustomDetectorManager::new(); |
11 | 0 | let detectors = manager.list_detectors(); |
12 | ||
13 | 0 | if detectors.is_empty() { |
14 | 0 | println!("No custom detectors found. Use 'create-examples' to generate some."); |
15 | 0 | return Ok(()); |
16 | 0 | } |
17 | ||
18 | 0 | println!("๐ Custom Detectors:"); |
19 | 0 | for detector in detectors { |
20 | 0 | println!(" ๐ {} ({})", detector.name, detector.description); |
21 | 0 | println!(" Pattern: {}", detector.pattern); |
22 | 0 | println!(" Severity: {:?}", detector.severity); |
23 | 0 | println!(" Enabled: {}", detector.enabled); |
24 | 0 | if !detector.file_extensions.is_empty() { |
25 | 0 | println!(" Extensions: {}", detector.file_extensions.join(", ")); |
26 | 0 | } |
27 | 0 | println!(); |
28 | } | |
29 | } | |
30 | ||
31 | 3 | CustomDetectorAction::CreateExamples { output } => { |
32 | 3 | let mut manager = CustomDetectorManager::new(); |
33 | 3 | manager.create_examples() |
34 | 3 | manager.save_to_file(&output) |
35 | 3 | println!( |
36 | 3 | "โ
Created example custom detectors in {}", |
37 | 3 | output.display() |
38 | ); | |
39 | } | |
40 | ||
41 | 1 | CustomDetectorAction::Load { file } => { |
42 | 1 | let mut manager = CustomDetectorManager::new(); |
43 | 1 | manager.load_from_file(&file) |
44 | ||
45 | 1 | let detectors = manager.list_detectors(); |
46 | 1 | println!( |
47 | 1 | "โ
Loaded {} custom detectors from {}", |
48 | 1 | detectors.len(), |
49 | 1 | file.display() |
50 | ); | |
51 | ||
52 | 4 | for |
53 | 3 | println!( |
54 | 3 | " - {} ({})", |
55 | detector.name, | |
56 | 3 | if detector.enabled { |
57 | 3 | "enabled" |
58 | } else { | |
59 | 0 | "disabled" |
60 | } | |
61 | ); | |
62 | } | |
63 | } | |
64 | ||
65 | CustomDetectorAction::Test { | |
66 | 1 | detectors, |
67 | 1 | test_file, |
68 | } => { | |
69 | 1 | let mut manager = CustomDetectorManager::new(); |
70 | 1 | manager.load_from_file(&detectors) |
71 | ||
72 | 1 | let content = std::fs::read_to_string(&test_file) |
73 | 1 | let detector_instances = manager.get_detectors(); |
74 | ||
75 | 1 | println!("๐งช Testing custom detectors on {}", test_file.display()); |
76 | ||
77 | 1 | let mut total_matches = 0; |
78 | 4 | for |
79 | 3 | let matches = detector.detect(&content, &test_file); |
80 | 3 | if !matches.is_empty() { |
81 | 0 | println!(" Found {} matches:", matches.len()); |
82 | 0 | for mat in &matches { |
83 | 0 | println!(" {}:{} - {}", mat.line_number, mat.column, mat.message); |
84 | 0 | } |
85 | 0 | total_matches += matches.len(); |
86 | 3 | } |
87 | } | |
88 | ||
89 | 1 | if total_matches == 0 { |
90 | 1 | println!(" โ
No matches found"); |
91 | 1 | } else { |
92 | 0 | println!(" ๐ Total matches: {}", total_matches); |
93 | 0 | } |
94 | } | |
95 | } | |
96 | ||
97 | 5 | Ok(()) |
98 | 5 | } |
99 | ||
100 | 2 | pub fn handle_incremental(action: IncrementalAction) -> Result<()> { |
101 | 2 | let state_file = PathBuf::from("code-guardian.incremental"); |
102 | ||
103 | 2 | match action { |
104 | IncrementalAction::Status => { | |
105 | 1 | if !state_file.exists() { |
106 | 1 | println!("โ No incremental scan state found."); |
107 | 1 | println!(" Run a scan with --incremental to create state."); |
108 | 1 | return Ok(()); |
109 | 0 | } |
110 | ||
111 | // Load state and show status | |
112 | 0 | println!("๐ Incremental Scan Status:"); |
113 | 0 | println!(" State file: {}", state_file.display()); |
114 | 0 | println!( |
115 | 0 | " State file size: {} bytes", |
116 | 0 | std::fs::metadata(&state_file)?.len() |
117 | ); | |
118 | ||
119 | // Try to load and show basic stats | |
120 | 0 | if let Ok(content) = std::fs::read_to_string(&state_file) { |
121 | 0 | if let Ok(state) = |
122 | 0 | serde_json::from_str::<code_guardian_core::IncrementalState>(&content) |
123 | { | |
124 | 0 | println!(" Tracked files: {}", state.file_metadata.len()); |
125 | 0 | println!(" Scan history: {} entries", state.scan_history.len()); |
126 | ||
127 | 0 | if let Some(last_scan) = state.scan_history.last() { |
128 | 0 | println!(" Last scan:"); |
129 | 0 | println!(" Files scanned: {}", last_scan.files_scanned); |
130 | 0 | println!(" Files skipped: {}", last_scan.files_skipped); |
131 | 0 | println!(" Duration: {}ms", last_scan.scan_duration_ms); |
132 | 0 | } |
133 | 0 | } |
134 | 0 | } |
135 | } | |
136 | ||
137 | IncrementalAction::Reset => { | |
138 | 1 | if state_file.exists() { |
139 | 0 | std::fs::remove_file(&state_file)?; |
140 | 0 | println!("โ Incremental scan state reset."); |
141 | 0 | println!(" Next scan will be a full scan."); |
142 | 1 | } else { |
143 | 1 | println!("โ No incremental state to reset."); |
144 | 1 | } |
145 | } | |
146 | ||
147 | IncrementalAction::Stats => { | |
148 | 0 | if !state_file.exists() { |
149 | 0 | println!("โ No incremental scan state found."); |
150 | 0 | return Ok(()); |
151 | 0 | } |
152 | ||
153 | 0 | let content = std::fs::read_to_string(&state_file)?; |
154 | 0 | let state: code_guardian_core::IncrementalState = serde_json::from_str(&content)?; |
155 | ||
156 | 0 | println!("๐ Incremental Scan Statistics:"); |
157 | 0 | println!(" Total tracked files: {}", state.file_metadata.len()); |
158 | 0 | println!(" Scan history entries: {}", state.scan_history.len()); |
159 | ||
160 | 0 | if !state.scan_history.is_empty() { |
161 | 0 | let recent_scans = state.scan_history.iter().rev().take(5); |
162 | 0 | println!(" Recent scans:"); |
163 | ||
164 | 0 | for (i, scan) in recent_scans.enumerate() { |
165 | 0 | let timestamp = chrono::DateTime::from_timestamp(scan.timestamp as i64, 0) |
166 | 0 | .map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string()) |
167 | 0 | .unwrap_or_else(|| "Unknown".to_string()); |
168 | ||
169 | 0 | println!( |
170 | 0 | " {}. {} - {} files scanned, {} skipped ({}ms)", |
171 | 0 | i + 1, |
172 | timestamp, | |
173 | scan.files_scanned, | |
174 | scan.files_skipped, | |
175 | scan.scan_duration_ms | |
176 | ); | |
177 | } | |
178 | ||
179 | // Calculate average speedup | |
180 | 0 | let total_scanned: usize = state.scan_history.iter().map(|s| s.files_scanned).sum(); |
181 | 0 | let total_skipped: usize = state.scan_history.iter().map(|s| s.files_skipped).sum(); |
182 | 0 | let total_files = total_scanned + total_skipped; |
183 | ||
184 | 0 | if total_files > 0 { |
185 | 0 | let average_speedup = total_files as f64 / total_scanned.max(1) as f64; |
186 | 0 | println!(" Average speedup: {:.2}x", average_speedup); |
187 | 0 | println!( |
188 | 0 | " Cache hit rate: {:.1}%", |
189 | 0 | (total_skipped as f64 / total_files as f64) * 100.0 |
190 | 0 | ); |
191 | 0 | } |
192 | 0 | } |
193 | } | |
194 | } | |
195 | ||
196 | 1 | Ok(()) |
197 | 2 | } |
198 | ||
199 | 2 | pub async fn handle_distributed(action: DistributedAction) -> Result<()> { |
200 | 2 | match action { |
201 | 1 | DistributedAction::Setup { workers } => { |
202 | 1 | println!( |
203 | 1 | "๐ Setting up distributed scanning with {} workers", |
204 | workers | |
205 | ); | |
206 | ||
207 | 1 | let mut coordinator = DistributedCoordinator::new(); |
208 | ||
209 | 2 | for i in 0.. |
210 | 2 | let worker_config = WorkerConfig { |
211 | 2 | worker_id: format!("worker_{}", i), |
212 | 2 | max_concurrent_units: 4, |
213 | 2 | supported_detectors: vec![ |
214 | 2 | "TODO".to_string(), |
215 | 2 | "FIXME".to_string(), |
216 | 2 | "HACK".to_string(), |
217 | 2 | "BUG".to_string(), |
218 | 2 | ], |
219 | 2 | cpu_cores: 4, |
220 | 2 | memory_limit_mb: 2048, |
221 | 2 | endpoint: Some(format!("worker-{}.local:8080", i)), |
222 | 2 | }; |
223 | 2 | |
224 | 2 | coordinator.register_worker(worker_config); |
225 | 2 | } |
226 | ||
227 | 1 | println!("โ
Distributed setup complete!"); |
228 | 1 | println!(" Workers: {}", workers); |
229 | 1 | println!( |
230 | 1 | " Total capacity: {} cores, {}MB memory", |
231 | 1 | workers * 4, |
232 | 1 | workers * 2048 |
233 | ); | |
234 | ||
235 | 1 | println!("\n๐ก To run a distributed scan:"); |
236 | 1 | println!( |
237 | 1 | " code-guardian distributed scan <path> --workers {}", |
238 | workers | |
239 | ); | |
240 | } | |
241 | ||
242 | DistributedAction::Scan { | |
243 | 1 | path, |
244 | 1 | workers, |
245 | 1 | batch_size, |
246 | } => { | |
247 | 1 | println!("๐ Running distributed scan on {}", path.display()); |
248 | 1 | println!(" Workers: {}, Batch size: {}", workers, batch_size); |
249 | ||
250 | 1 | let mut coordinator = DistributedCoordinator::new(); |
251 | ||
252 | // Register workers | |
253 | 2 | for i in 0.. |
254 | 2 | let worker_config = WorkerConfig { |
255 | 2 | worker_id: format!("worker_{}", i), |
256 | 2 | max_concurrent_units: 2, |
257 | 2 | supported_detectors: vec!["TODO".to_string(), "FIXME".to_string()], |
258 | 2 | cpu_cores: 2, |
259 | 2 | memory_limit_mb: 1024, |
260 | 2 | endpoint: None, |
261 | 2 | }; |
262 | 2 | coordinator.register_worker(worker_config); |
263 | 2 | } |
264 | ||
265 | // Register basic detectors | |
266 | 1 | coordinator.register_detector( |
267 | 1 | "TODO".to_string(), |
268 | 1 | Box::new(code_guardian_core::TodoDetector), |
269 | ); | |
270 | 1 | coordinator.register_detector( |
271 | 1 | "FIXME".to_string(), |
272 | 1 | Box::new(code_guardian_core::FixmeDetector), |
273 | ); | |
274 | ||
275 | // Collect files | |
276 | 1 | let files: Vec<PathBuf> = ignore::WalkBuilder::new(&path) |
277 | 1 | .build() |
278 | 2 | . |
279 | 2 | entry.ok().and_then(|e| { |
280 | 2 | if e.file_type().is_some_and(|ft| ft.is_file()) { |
281 | 1 | Some(e.path().to_path_buf()) |
282 | } else { | |
283 | 1 | None |
284 | } | |
285 | 2 | }) |
286 | 2 | }) |
287 | 1 | .collect(); |
288 | ||
289 | 1 | coordinator.create_work_units(files, batch_size) |
290 | 1 | let matches = coordinator.execute_distributed_scan().await |
291 | ||
292 | 1 | let stats = coordinator.get_statistics(); |
293 | ||
294 | 1 | println!("โ
Distributed scan complete!"); |
295 | 1 | println!(" Total matches: {}", matches.len()); |
296 | 1 | println!(" Files processed: {}", stats.total_files_processed); |
297 | 1 | println!(" Work units: {}", stats.total_work_units); |
298 | 1 | println!(" Processing time: {}ms", stats.total_processing_time_ms); |
299 | ||
300 | // Show top matches | |
301 | 1 | if !matches.is_empty() { |
302 | 1 | println!("\n๐ Sample matches:"); |
303 | 1 | for (i, mat) in matches.iter().take(5).enumerate() { |
304 | 1 | println!( |
305 | 1 | " {}. {}:{} - {}", |
306 | 1 | i + 1, |
307 | 1 | mat.line_number, |
308 | 1 | mat.column, |
309 | 1 | mat.message |
310 | 1 | ); |
311 | 1 | } |
312 | ||
313 | 1 | if matches.len() > 5 { |
314 | 0 | println!(" ... and {} more", matches.len() - 5); |
315 | 1 | } |
316 | 0 | } |
317 | } | |
318 | } | |
319 | ||
320 | 2 | Ok(()) |
321 | 2 | } |
Line | Count | Source |
1 | use anyhow::Result; | |
2 | use code_guardian_core::{ | |
3 | DetectorFactory, DetectorProfile, OptimizedScanner, Scanner, StreamingScanner, | |
4 | }; | |
5 | use std::path::Path; | |
6 | use std::time::Instant; | |
7 | ||
8 | /// Run performance benchmarks on different scanner types | |
9 | 1 | pub fn run_benchmark(path: &Path) -> Result<()> { |
10 | 1 | println!("๐ Code-Guardian Performance Benchmark"); |
11 | 1 | println!("=====================================\n"); |
12 | ||
13 | 1 | println!("๐ Scanning path: {}", path.display()); |
14 | 1 | println!("๐ Testing different scanner configurations...\n"); |
15 | ||
16 | // Test basic scanner | |
17 | 1 | println!("1๏ธโฃ Basic Scanner (TODO + FIXME only)"); |
18 | 1 | let start = Instant::now(); |
19 | 1 | let basic_scanner = Scanner::new(DetectorFactory::create_default_detectors()); |
20 | 1 | let basic_matches = basic_scanner.scan(path) |
21 | 1 | let basic_duration = start.elapsed(); |
22 | 1 | println!(" โฑ๏ธ Duration: {:?}", basic_duration); |
23 | 1 | println!(" ๐ Matches found: {}", basic_matches.len()); |
24 | 1 | println!(); |
25 | ||
26 | // Test comprehensive scanner | |
27 | 1 | println!("2๏ธโฃ Comprehensive Scanner (All detectors)"); |
28 | 1 | let start = Instant::now(); |
29 | 1 | let comprehensive_scanner = Scanner::new(DetectorProfile::Comprehensive.get_detectors()); |
30 | 1 | let comprehensive_matches = comprehensive_scanner.scan(path) |
31 | 1 | let comprehensive_duration = start.elapsed(); |
32 | 1 | println!(" โฑ๏ธ Duration: {:?}", comprehensive_duration); |
33 | 1 | println!(" ๐ Matches found: {}", comprehensive_matches.len()); |
34 | 1 | println!(); |
35 | ||
36 | // Test optimized scanner | |
37 | 1 | println!("3๏ธโฃ Optimized Scanner (With caching)"); |
38 | 1 | let start = Instant::now(); |
39 | 1 | let optimized_scanner = OptimizedScanner::new(DetectorProfile::Comprehensive.get_detectors()) |
40 | 1 | .with_cache_size(10000); |
41 | 1 | let (optimized_matches, optimized_metrics) = optimized_scanner.scan_optimized(path) |
42 | 1 | let optimized_duration = start.elapsed(); |
43 | 1 | println!(" โฑ๏ธ Duration: {:?}", optimized_duration); |
44 | 1 | println!(" ๐ Matches found: {}", optimized_matches.len()); |
45 | 1 | println!( |
46 | 1 | " ๐ Files scanned: {}", |
47 | optimized_metrics.total_files_scanned | |
48 | ); | |
49 | 1 | println!( |
50 | 1 | " ๐ Lines processed: {}", |
51 | optimized_metrics.total_lines_processed | |
52 | ); | |
53 | 1 | println!(" ๐ฏ Cache hits: {}", optimized_metrics.cache_hits); |
54 | 1 | println!(" ๐ฏ Cache misses: {}", optimized_metrics.cache_misses); |
55 | 1 | println!(); |
56 | ||
57 | // Test streaming scanner | |
58 | 1 | println!("4๏ธโฃ Streaming Scanner (Memory efficient)"); |
59 | 1 | let start = Instant::now(); |
60 | 1 | let streaming_scanner = StreamingScanner::new(DetectorProfile::Comprehensive.get_detectors()); |
61 | 1 | let mut streaming_matches = Vec::new(); |
62 | 1 | let |
63 | 0 | streaming_matches.extend(batch); |
64 | 0 | Ok(()) |
65 | 1 |
|
66 | 0 | let streaming_duration = start.elapsed(); |
67 | 0 | println!(" โฑ๏ธ Duration: {:?}", streaming_duration); |
68 | 0 | println!(" ๐ Matches found: {}", streaming_matches.len()); |
69 | 0 | println!( |
70 | 0 | " ๐ Files scanned: {}", |
71 | streaming_metrics.total_files_scanned | |
72 | ); | |
73 | 0 | println!( |
74 | 0 | " ๐ Lines processed: {}", |
75 | streaming_metrics.total_lines_processed | |
76 | ); | |
77 | 0 | println!(); |
78 | ||
79 | // Performance comparison | |
80 | 0 | println!("๐ Performance Comparison"); |
81 | 0 | println!("========================"); |
82 | ||
83 | 0 | let basic_files_per_sec = |
84 | 0 | optimized_metrics.total_files_scanned as f64 / basic_duration.as_secs_f64(); |
85 | 0 | let comprehensive_files_per_sec = |
86 | 0 | optimized_metrics.total_files_scanned as f64 / comprehensive_duration.as_secs_f64(); |
87 | 0 | let optimized_files_per_sec = |
88 | 0 | optimized_metrics.total_files_scanned as f64 / optimized_duration.as_secs_f64(); |
89 | 0 | let streaming_files_per_sec = |
90 | 0 | streaming_metrics.total_files_scanned as f64 / streaming_duration.as_secs_f64(); |
91 | ||
92 | 0 | println!("๐ Files per second:"); |
93 | 0 | println!(" Basic: {:.1}", basic_files_per_sec); |
94 | 0 | println!(" Comprehensive: {:.1}", comprehensive_files_per_sec); |
95 | 0 | println!(" Optimized: {:.1}", optimized_files_per_sec); |
96 | 0 | println!(" Streaming: {:.1}", streaming_files_per_sec); |
97 | 0 | println!(); |
98 | ||
99 | 0 | println!("๐ฏ Speed improvements:"); |
100 | 0 | let optimized_speedup = optimized_files_per_sec / comprehensive_files_per_sec; |
101 | 0 | let streaming_speedup = streaming_files_per_sec / comprehensive_files_per_sec; |
102 | 0 | println!(" Optimized vs Comprehensive: {:.2}x", optimized_speedup); |
103 | 0 | println!(" Streaming vs Comprehensive: {:.2}x", streaming_speedup); |
104 | 0 | println!(); |
105 | ||
106 | 0 | println!("๐ก Recommendations:"); |
107 | 0 | if optimized_speedup > 1.2 { |
108 | 0 | println!(" โ
Use --optimize flag for better performance"); |
109 | 0 | } |
110 | 0 | if streaming_speedup > 1.1 { |
111 | 0 | println!(" โ
Use --streaming flag for large codebases"); |
112 | 0 | } |
113 | 0 | if optimized_metrics.cache_hits > 0 { |
114 | 0 | println!(" โ
Caching is effective for repeated scans"); |
115 | 0 | } |
116 | ||
117 | 0 | println!(); |
118 | 0 | println!("๐ Benchmark completed!"); |
119 | ||
120 | 0 | Ok(()) |
121 | 1 | } |
122 | ||
123 | /// Quick performance test | |
124 | 1 | pub fn quick_performance_test(path: &Path) -> Result<()> { |
125 | 1 | println!("โก Quick Performance Test"); |
126 | 1 | println!("========================\n"); |
127 | ||
128 | 1 | let start = Instant::now(); |
129 | 1 | let scanner = OptimizedScanner::new(DetectorProfile::Basic.get_detectors()); |
130 | 1 | let (matches, metrics) = scanner.scan_optimized(path) |
131 | 1 | let duration = start.elapsed(); |
132 | ||
133 | 1 | println!("๐ Results:"); |
134 | 1 | println!(" Duration: {:?}", duration); |
135 | 1 | println!(" Files scanned: {}", metrics.total_files_scanned); |
136 | 1 | println!(" Lines processed: {}", metrics.total_lines_processed); |
137 | 1 | println!(" Matches found: {}", matches.len()); |
138 | 1 | println!( |
139 | 1 | " Files/sec: {:.1}", |
140 | 1 | metrics.total_files_scanned as f64 / duration.as_secs_f64() |
141 | ); | |
142 | 1 | println!( |
143 | 1 | " Lines/sec: {:.1}", |
144 | 1 | metrics.total_lines_processed as f64 / duration.as_secs_f64() |
145 | ); | |
146 | ||
147 | 1 | if metrics.cache_hits > 0 { |
148 | 0 | let hit_rate = |
149 | 0 | metrics.cache_hits as f64 / (metrics.cache_hits + metrics.cache_misses) as f64; |
150 | 0 | println!(" Cache hit rate: {:.1}%", hit_rate * 100.0); |
151 | 1 | } |
152 | ||
153 | 1 | Ok(()) |
154 | 1 | } |
Line | Count | Source |
1 | use anyhow::Result; | |
2 | use clap::CommandFactory; | |
3 | use clap_complete::generate; | |
4 | use clap_complete::Shell; | |
5 | use code_guardian_storage::ScanRepository; | |
6 | use std::io; | |
7 | use std::path::PathBuf; | |
8 | ||
9 | use crate::benchmark; | |
10 | use crate::cli_definitions::{Cli, GitAction}; | |
11 | use crate::git_integration::GitIntegration; | |
12 | use crate::utils; | |
13 | ||
14 | /// Handle history command - show all scan history from database | |
15 | 2 | pub fn handle_history(db: Option<PathBuf>) -> Result<()> { |
16 | 2 | let db_path = utils::get_db_path(db); |
17 | 2 | let |
18 | 1 | let scans = repo.get_all_scans() |
19 | ||
20 | 1 | if scans.is_empty() { |
21 | 0 | println!("No scans found."); |
22 | 0 | return Ok(()); |
23 | 1 | } |
24 | ||
25 | 1 | println!("Scan History:"); |
26 | 2 | for |
27 | 1 | println!( |
28 | 1 | "ID: {}, Timestamp: {}, Path: {}", |
29 | 1 | scan.id.unwrap(), |
30 | 1 | chrono::DateTime::from_timestamp(scan.timestamp, 0) |
31 | 1 | .unwrap() |
32 | 1 | .format("%Y-%m-%d %H:%M:%S"), |
33 | 1 | scan.root_path |
34 | 1 | ); |
35 | 1 | } |
36 | 1 | Ok(()) |
37 | 2 | } |
38 | ||
39 | /// Handle shell completion generation | |
40 | 1 | pub fn handle_completion(shell: Shell) -> Result<()> { |
41 | 1 | let mut cmd = Cli::command(); |
42 | 1 | let bin_name = cmd.get_name().to_string(); |
43 | 1 | generate(shell, &mut cmd, bin_name, &mut io::stdout()); |
44 | 1 | Ok(()) |
45 | 1 | } |
46 | ||
47 | /// Handle benchmark command | |
48 | 2 | pub fn handle_benchmark(path: Option<PathBuf>, quick: bool) -> Result<()> { |
49 | 2 | let benchmark_path = path.unwrap_or_else(|| |
50 | ||
51 | 2 | if quick { |
52 | 1 | benchmark::quick_performance_test(&benchmark_path) |
53 | } else { | |
54 | 1 | benchmark::run_benchmark(&benchmark_path) |
55 | } | |
56 | 2 | } |
57 | ||
58 | // These functions are re-exported from advanced_handlers | |
59 | pub use crate::advanced_handlers::{ | |
60 | handle_custom_detectors, handle_distributed, handle_incremental, | |
61 | }; | |
62 | ||
63 | /// Handle git integration commands | |
64 | 0 | pub fn handle_git(action: GitAction) -> Result<()> { |
65 | 0 | match action { |
66 | 0 | GitAction::InstallHook { path } => { |
67 | 0 | println!("๐ง Installing Code-Guardian pre-commit hook..."); |
68 | ||
69 | 0 | if !GitIntegration::is_git_repo(&path) { |
70 | 0 | eprintln!("โ Error: {} is not a git repository", path.display()); |
71 | 0 | std::process::exit(1); |
72 | 0 | } |
73 | ||
74 | 0 | let repo_root = GitIntegration::get_repo_root(&path)?; |
75 | 0 | GitIntegration::install_pre_commit_hook(&repo_root)?; |
76 | ||
77 | 0 | println!("๐ก Usage: The hook will automatically run on 'git commit'"); |
78 | 0 | println!("๐ก Manual run: code-guardian pre-commit --staged-only --fast"); |
79 | 0 | Ok(()) |
80 | } | |
81 | 0 | GitAction::UninstallHook { path } => { |
82 | 0 | println!("๐๏ธ Uninstalling Code-Guardian pre-commit hook..."); |
83 | ||
84 | 0 | if !GitIntegration::is_git_repo(&path) { |
85 | 0 | eprintln!("โ Error: {} is not a git repository", path.display()); |
86 | 0 | std::process::exit(1); |
87 | 0 | } |
88 | ||
89 | 0 | let repo_root = GitIntegration::get_repo_root(&path)?; |
90 | 0 | GitIntegration::uninstall_pre_commit_hook(&repo_root)?; |
91 | 0 | Ok(()) |
92 | } | |
93 | 0 | GitAction::Staged { path } => { |
94 | 0 | println!("๐ Listing staged files..."); |
95 | ||
96 | 0 | if !GitIntegration::is_git_repo(&path) { |
97 | 0 | eprintln!("โ Error: {} is not a git repository", path.display()); |
98 | 0 | std::process::exit(1); |
99 | 0 | } |
100 | ||
101 | 0 | let repo_root = GitIntegration::get_repo_root(&path)?; |
102 | 0 | let staged_files = GitIntegration::get_staged_files(&repo_root)?; |
103 | ||
104 | 0 | if staged_files.is_empty() { |
105 | 0 | println!("โน๏ธ No staged files found."); |
106 | 0 | } else { |
107 | 0 | println!("๐ Found {} staged file(s):", staged_files.len()); |
108 | 0 | for (i, file) in staged_files.iter().enumerate() { |
109 | 0 | println!(" {}. {}", i + 1, file.display()); |
110 | 0 | } |
111 | } | |
112 | 0 | Ok(()) |
113 | } | |
114 | } | |
115 | 0 | } |
Line | Count | Source |
1 | use anyhow::Result; | |
2 | use code_guardian_core::Match; | |
3 | use code_guardian_storage::{Scan, ScanRepository, SqliteScanRepository}; | |
4 | use std::path::PathBuf; | |
5 | ||
6 | use crate::report_handlers::get_formatter; | |
7 | use crate::utils::get_db_path; | |
8 | ||
9 | 2 | pub fn handle_compare(id1: i64, id2: i64, format: String, db: Option<PathBuf>) -> Result<()> { |
10 | 2 | let formatter = get_formatter(&format) |
11 | 2 | let db_path = get_db_path(db); |
12 | 2 | let repo = SqliteScanRepository::new(&db_path) |
13 | 2 | let scan1 = repo.get_scan(id1) |
14 | 2 | let scan2 = repo.get_scan(id2) |
15 | 2 | match (scan1, scan2) { |
16 | 2 | (Some(s1), Some(s2)) => { |
17 | 2 | let diff = compare_scans(&s1, &s2); |
18 | 2 | println!("{}", formatter.format(&diff)); |
19 | 2 | } |
20 | 0 | _ => println!("One or both scans not found."), |
21 | } | |
22 | 2 | Ok(()) |
23 | 2 | } |
24 | ||
25 | 2 | pub fn compare_scans(scan1: &Scan, scan2: &Scan) -> Vec<Match> { |
26 | // Simple diff: matches in scan2 not in scan1 | |
27 | // For simplicity, assume matches are unique by file_path, line_number, pattern | |
28 | 2 | let set1: std::collections::HashSet<_> = scan1 |
29 | 2 | .matches |
30 | 2 | .iter() |
31 | 2 | .map(|m| (m.file_path.clone(), m.line_number, m.pattern.clone())) |
32 | 2 | .collect(); |
33 | 2 | scan2 |
34 | 2 | .matches |
35 | 2 | .iter() |
36 | 4 | . |
37 | 2 | .cloned() |
38 | 2 | .collect() |
39 | 2 | } |
Line | Count | Source |
1 | use anyhow::{anyhow, Result}; | |
2 | use std::path::{Path, PathBuf}; | |
3 | use std::process::Command; | |
4 | ||
5 | /// Git integration utilities for Code-Guardian | |
6 | pub struct GitIntegration; | |
7 | ||
8 | impl GitIntegration { | |
9 | /// Get list of staged files (files in git index) | |
10 | 0 | pub fn get_staged_files(repo_path: &Path) -> Result<Vec<PathBuf>> { |
11 | 0 | let output = Command::new("git") |
12 | 0 | .args(["diff", "--cached", "--name-only", "--diff-filter=ACMR"]) |
13 | 0 | .current_dir(repo_path) |
14 | 0 | .output()?; |
15 | ||
16 | 0 | if !output.status.success() { |
17 | 0 | let stderr = String::from_utf8_lossy(&output.stderr); |
18 | 0 | return Err(anyhow!("Git command failed: {}", stderr)); |
19 | 0 | } |
20 | ||
21 | 0 | let stdout = String::from_utf8_lossy(&output.stdout); |
22 | 0 | let files: Vec<PathBuf> = stdout |
23 | 0 | .lines() |
24 | 0 | .filter(|line| !line.trim().is_empty()) |
25 | 0 | .map(|line| repo_path.join(line.trim())) |
26 | 0 | .filter(|path| path.exists()) // Only include files that still exist |
27 | 0 | .collect(); |
28 | ||
29 | 0 | Ok(files) |
30 | 0 | } |
31 | ||
32 | /// Get the root directory of the git repository | |
33 | 0 | pub fn get_repo_root(start_path: &Path) -> Result<PathBuf> { |
34 | 0 | let output = Command::new("git") |
35 | 0 | .args(["rev-parse", "--show-toplevel"]) |
36 | 0 | .current_dir(start_path) |
37 | 0 | .output()?; |
38 | ||
39 | 0 | if !output.status.success() { |
40 | 0 | return Err(anyhow!("Not in a git repository or git command failed")); |
41 | 0 | } |
42 | ||
43 | 0 | let stdout = String::from_utf8_lossy(&output.stdout); |
44 | 0 | let repo_root = stdout.trim(); |
45 | 0 | Ok(PathBuf::from(repo_root)) |
46 | 0 | } |
47 | ||
48 | /// Check if the current directory is in a git repository | |
49 | 2 | pub fn is_git_repo(path: &Path) -> bool { |
50 | 2 | Command::new("git") |
51 | 2 | .args(["rev-parse", "--git-dir"]) |
52 | 2 | .current_dir(path) |
53 | 2 | .output() |
54 | 2 | .map(|output| output.status.success()) |
55 | 2 | .unwrap_or(false) |
56 | 2 | } |
57 | ||
58 | /// Get modified lines for staged files (useful for line-specific scanning) | |
59 | #[allow(dead_code)] | |
60 | 0 | pub fn get_staged_lines(repo_path: &Path) -> Result<Vec<StagedChange>> { |
61 | 0 | let output = Command::new("git") |
62 | 0 | .args(["diff", "--cached", "--unified=0"]) |
63 | 0 | .current_dir(repo_path) |
64 | 0 | .output()?; |
65 | ||
66 | 0 | if !output.status.success() { |
67 | 0 | let stderr = String::from_utf8_lossy(&output.stderr); |
68 | 0 | return Err(anyhow!("Git diff command failed: {}", stderr)); |
69 | 0 | } |
70 | ||
71 | 0 | let stdout = String::from_utf8_lossy(&output.stdout); |
72 | 0 | Ok(parse_git_diff(&stdout, repo_path)) |
73 | 0 | } |
74 | ||
75 | /// Install pre-commit hook for Code-Guardian | |
76 | 0 | pub fn install_pre_commit_hook(repo_path: &Path) -> Result<()> { |
77 | 0 | let hooks_dir = repo_path.join(".git").join("hooks"); |
78 | 0 | let hook_path = hooks_dir.join("pre-commit"); |
79 | ||
80 | // Create hooks directory if it doesn't exist | |
81 | 0 | std::fs::create_dir_all(&hooks_dir)?; |
82 | ||
83 | // Pre-commit hook script | |
84 | 0 | let hook_script = r#"#!/bin/sh |
85 | 0 | # Code-Guardian pre-commit hook |
86 | 0 | # This hook runs Code-Guardian on staged files before commit |
87 | 0 | |
88 | 0 | # Check if code-guardian is available |
89 | 0 | if ! command -v code-guardian >/dev/null 2>&1; then |
90 | 0 | echo "Error: code-guardian not found in PATH" |
91 | 0 | echo "Please install code-guardian or add it to your PATH" |
92 | 0 | exit 1 |
93 | 0 | fi |
94 | 0 | |
95 | 0 | # Run Code-Guardian pre-commit check |
96 | 0 | exec code-guardian pre-commit --staged-only --fast |
97 | 0 | "#; |
98 | ||
99 | 0 | std::fs::write(&hook_path, hook_script)?; |
100 | ||
101 | // Make the hook executable (Unix-like systems) | |
102 | #[cfg(unix)] | |
103 | { | |
104 | use std::os::unix::fs::PermissionsExt; | |
105 | 0 | let mut perms = std::fs::metadata(&hook_path)?.permissions(); |
106 | 0 | perms.set_mode(0o755); |
107 | 0 | std::fs::set_permissions(&hook_path, perms)?; |
108 | } | |
109 | ||
110 | 0 | println!("โ Pre-commit hook installed at: {}", hook_path.display()); |
111 | 0 | println!("๐ง The hook will run 'code-guardian pre-commit --staged-only --fast' before each commit"); |
112 | ||
113 | 0 | Ok(()) |
114 | 0 | } |
115 | ||
116 | /// Uninstall pre-commit hook | |
117 | 0 | pub fn uninstall_pre_commit_hook(repo_path: &Path) -> Result<()> { |
118 | 0 | let hook_path = repo_path.join(".git").join("hooks").join("pre-commit"); |
119 | ||
120 | 0 | if hook_path.exists() { |
121 | // Check if it's our hook before removing | |
122 | 0 | let content = std::fs::read_to_string(&hook_path)?; |
123 | 0 | if content.contains("Code-Guardian pre-commit hook") { |
124 | 0 | std::fs::remove_file(&hook_path)?; |
125 | 0 | println!("โ Code-Guardian pre-commit hook removed"); |
126 | 0 | } else { |
127 | 0 | println!( |
128 | 0 | "โ ๏ธ Pre-commit hook exists but doesn't appear to be Code-Guardian's hook" |
129 | 0 | ); |
130 | 0 | println!(" Manual removal required: {}", hook_path.display()); |
131 | 0 | } |
132 | 0 | } else { |
133 | 0 | println!("โน๏ธ No pre-commit hook found"); |
134 | 0 | } |
135 | ||
136 | 0 | Ok(()) |
137 | 0 | } |
138 | } | |
139 | ||
140 | /// Represents a staged change in git | |
141 | #[allow(dead_code)] | |
142 | #[derive(Debug, Clone)] | |
143 | pub struct StagedChange { | |
144 | pub file_path: PathBuf, | |
145 | pub added_lines: Vec<LineRange>, | |
146 | pub removed_lines: Vec<LineRange>, | |
147 | } | |
148 | ||
149 | /// Represents a range of lines | |
150 | #[allow(dead_code)] | |
151 | #[derive(Debug, Clone)] | |
152 | pub struct LineRange { | |
153 | pub start: usize, | |
154 | pub count: usize, | |
155 | } | |
156 | ||
157 | /// Parse git diff output to extract staged changes | |
158 | #[allow(dead_code)] | |
159 | 0 | fn parse_git_diff(diff_output: &str, repo_path: &Path) -> Vec<StagedChange> { |
160 | 0 | let mut changes = Vec::new(); |
161 | 0 | let mut current_file: Option<PathBuf> = None; |
162 | 0 | let mut added_lines = Vec::new(); |
163 | 0 | let mut removed_lines = Vec::new(); |
164 | ||
165 | 0 | for line in diff_output.lines() { |
166 | 0 | if line.starts_with("diff --git") { |
167 | // Save previous file's changes | |
168 | 0 | if let Some(file_path) = current_file.take() { |
169 | 0 | changes.push(StagedChange { |
170 | 0 | file_path, |
171 | 0 | added_lines: std::mem::take(&mut added_lines), |
172 | 0 | removed_lines: std::mem::take(&mut removed_lines), |
173 | 0 | }); |
174 | 0 | } |
175 | 0 | } else if line.starts_with("+++") { |
176 | // Extract new file path | |
177 | 0 | if let Some(path_part) = line.strip_prefix("+++ b/") { |
178 | 0 | current_file = Some(repo_path.join(path_part)); |
179 | 0 | } |
180 | 0 | } else if line.starts_with("@@") { |
181 | // Parse hunk header: @@ -old_start,old_count +new_start,new_count @@ | |
182 | 0 | if let Some(hunk_info) = line.strip_prefix("@@").and_then(|s| s.strip_suffix("@@")) { |
183 | 0 | let parts: Vec<&str> = hunk_info.split_whitespace().collect(); |
184 | 0 | if parts.len() >= 2 { |
185 | // Parse removed lines (-old_start,old_count) | |
186 | 0 | if let Some(removed_part) = parts[0].strip_prefix('-') { |
187 | 0 | if let Some((start_str, count_str)) = removed_part.split_once(',') { |
188 | 0 | if let (Ok(start), Ok(count)) = |
189 | 0 | (start_str.parse::<usize>(), count_str.parse::<usize>()) |
190 | { | |
191 | 0 | if count > 0 { |
192 | 0 | removed_lines.push(LineRange { start, count }); |
193 | 0 | } |
194 | 0 | } |
195 | 0 | } |
196 | 0 | } |
197 | ||
198 | // Parse added lines (+new_start,new_count) | |
199 | 0 | if let Some(added_part) = parts[1].strip_prefix('+') { |
200 | 0 | if let Some((start_str, count_str)) = added_part.split_once(',') { |
201 | 0 | if let (Ok(start), Ok(count)) = |
202 | 0 | (start_str.parse::<usize>(), count_str.parse::<usize>()) |
203 | { | |
204 | 0 | if count > 0 { |
205 | 0 | added_lines.push(LineRange { start, count }); |
206 | 0 | } |
207 | 0 | } |
208 | 0 | } |
209 | 0 | } |
210 | 0 | } |
211 | 0 | } |
212 | 0 | } |
213 | } | |
214 | ||
215 | // Don't forget the last file | |
216 | 0 | if let Some(file_path) = current_file { |
217 | 0 | changes.push(StagedChange { |
218 | 0 | file_path, |
219 | 0 | added_lines, |
220 | 0 | removed_lines, |
221 | 0 | }); |
222 | 0 | } |
223 | ||
224 | 0 | changes |
225 | 0 | } |
226 | ||
227 | #[cfg(test)] | |
228 | mod tests { | |
229 | use super::*; | |
230 | use tempfile::TempDir; | |
231 | ||
232 | #[test] | |
233 | 2 | fn test_git_integration_basic() { |
234 | // Test that basic structures work | |
235 | 2 | let range = LineRange { start: 5, count: 3 }; |
236 | 2 | assert_eq!(range.start, 5); |
237 | 2 | assert_eq!(range.count, 3); |
238 | ||
239 | // Test that we can create staged change | |
240 | 2 | let temp_dir = TempDir::new().unwrap(); |
241 | 2 | let change = StagedChange { |
242 | 2 | file_path: temp_dir.path().join("test.rs"), |
243 | 2 | added_lines: vec![range], |
244 | 2 | removed_lines: vec![], |
245 | 2 | }; |
246 | ||
247 | 2 | assert_eq!(change.added_lines.len(), 1); |
248 | 2 | assert_eq!(change.removed_lines.len(), 0); |
249 | 2 | } |
250 | ||
251 | #[test] | |
252 | 2 | fn test_is_git_repo() { |
253 | 2 | let temp_dir = TempDir::new().unwrap(); |
254 | 2 | assert!(!GitIntegration::is_git_repo(temp_dir.path())); |
255 | 2 | } |
256 | ||
257 | #[test] | |
258 | 2 | fn test_line_range() { |
259 | 2 | let range = LineRange { start: 5, count: 3 }; |
260 | 2 | assert_eq!(range.start, 5); |
261 | 2 | assert_eq!(range.count, 3); |
262 | 2 | } |
263 | } |
Line | Count | Source |
1 | use anyhow::Result; | |
2 | use clap::Parser; | |
3 | ||
4 | // Module declarations | |
5 | mod advanced_handlers; | |
6 | mod benchmark; | |
7 | mod cli_definitions; | |
8 | mod command_handlers; | |
9 | mod comparison_handlers; | |
10 | mod git_integration; | |
11 | mod production_handlers; | |
12 | mod report_handlers; | |
13 | mod scan_handlers; | |
14 | mod stack_presets; | |
15 | mod utils; | |
16 | ||
17 | // Import the CLI definitions and command handlers | |
18 | use cli_definitions::{Cli, Commands}; | |
19 | use command_handlers::*; | |
20 | use comparison_handlers::*; | |
21 | use production_handlers::*; | |
22 | use report_handlers::*; | |
23 | use scan_handlers::*; | |
24 | use stack_presets::*; | |
25 | ||
26 | #[tokio::main] | |
27 | 31 | async fn main() -> Result<()> { |
28 | // Initialize tracing | |
29 | 31 | tracing_subscriber::fmt() |
30 | 31 | .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) |
31 | 31 | .init(); |
32 | ||
33 | 31 | let cli = Cli::parse(); |
34 | ||
35 | 31 | match cli.command { |
36 | 31 | Commands::Scan { |
37 | 31 |
|
38 | 31 |
|
39 | 31 |
|
40 | 31 |
|
41 | 31 |
|
42 | 31 |
|
43 | 31 |
|
44 | 31 |
|
45 | 31 |
|
46 | 31 |
|
47 | 31 |
|
48 | 31 |
|
49 | 31 |
|
50 | 31 |
|
51 | 31 |
|
52 | 31 | } => { |
53 | 31 | let |
54 | 10 | path, |
55 | 10 | db, |
56 | 10 | config_path: config, |
57 | 10 | profile, |
58 | 10 | show_progress: progress, |
59 | 10 | optimize, |
60 | 10 | streaming, |
61 | 10 | show_metrics: metrics, |
62 | 10 | incremental, |
63 | 10 | distributed, |
64 | 10 | custom_detectors, |
65 | 10 | cache_size, |
66 | 10 | batch_size, |
67 | 10 | max_file_size, |
68 | 10 | max_threads, |
69 | 10 | }; |
70 | 31 |
|
71 | 31 | } |
72 | 31 | Commands::History { |
73 | 31 | Commands::Report { |
74 | 31 | Commands::Compare { |
75 | 31 |
|
76 | 31 |
|
77 | 31 |
|
78 | 31 |
|
79 | 31 | } => |
80 | 31 | Commands::Completion { |
81 | 31 | Commands::Benchmark { |
82 | 31 | Commands::CustomDetectors { |
83 | 31 | Commands::Incremental { |
84 | 31 | Commands::Distributed { |
85 | 31 | Commands::ProductionCheck { |
86 | 31 |
|
87 | 31 |
|
88 | 31 |
|
89 | 31 |
|
90 | 31 |
|
91 | 31 |
|
92 | 31 | } => |
93 | 31 |
|
94 | 31 |
|
95 | 31 |
|
96 | 31 |
|
97 | 31 |
|
98 | 31 |
|
99 | 31 | ), |
100 | 31 | Commands::PreCommit { |
101 | 31 |
|
102 | 31 |
|
103 | 31 |
|
104 | 31 | } => |
105 | 31 | Commands::CiGate { |
106 | 31 |
|
107 | 31 |
|
108 | 31 |
|
109 | 31 |
|
110 | 31 |
|
111 | 31 | } => |
112 | 31 | Commands::Lang { |
113 | 31 |
|
114 | 31 |
|
115 | 31 |
|
116 | 31 |
|
117 | 31 | } => |
118 | 31 | Commands::Stack { |
119 | 31 | Commands::Watch { |
120 | 31 |
|
121 | 31 |
|
122 | 31 |
|
123 | 31 |
|
124 | 31 | } => |
125 | 31 | Commands::Git { |
126 | 31 | } |
127 | 31 | } |
Line | Count | Source |
1 | use crate::git_integration::GitIntegration; | |
2 | use anyhow::Result; | |
3 | use code_guardian_core::{AlertDetector, ConsoleLogDetector, DebuggerDetector}; | |
4 | use code_guardian_core::{DetectorFactory, Match, PatternDetector, Scanner}; | |
5 | use colored::*; | |
6 | use std::collections::HashMap; | |
7 | use std::fs; | |
8 | use std::path::PathBuf; | |
9 | use std::process; | |
10 | ||
11 | /// Handle production readiness check command | |
12 | 0 | pub fn handle_production_check( |
13 | 0 | path: PathBuf, |
14 | 0 | format: String, |
15 | 0 | fail_on_critical: bool, |
16 | 0 | fail_on_high: bool, |
17 | 0 | severity_filter: Vec<String>, |
18 | 0 | output: Option<PathBuf>, |
19 | 0 | ) -> Result<()> { |
20 | 0 | println!( |
21 | 0 | "๐ {} Production Readiness Check", |
22 | 0 | "Code-Guardian".bold().blue() |
23 | ); | |
24 | 0 | println!("๐ Scanning: {}", path.display()); |
25 | ||
26 | // Create production-ready detectors | |
27 | 0 | let detectors = DetectorFactory::create_production_ready_detectors(); |
28 | 0 | let scanner = Scanner::new(detectors); |
29 | ||
30 | // Perform the scan | |
31 | 0 | let matches = scanner.scan(&path)?; |
32 | ||
33 | // Filter by severity if specified | |
34 | 0 | let filtered_matches = if severity_filter.is_empty() { |
35 | 0 | matches |
36 | } else { | |
37 | 0 | filter_by_severity(matches, &severity_filter) |
38 | }; | |
39 | ||
40 | // Count issues by severity | |
41 | 0 | let severity_counts = count_by_severity(&filtered_matches); |
42 | ||
43 | // Generate output based on format | |
44 | 0 | let output_content = match format.as_str() { |
45 | 0 | "json" => generate_json_output(&filtered_matches, &severity_counts)?, |
46 | 0 | "summary" => generate_summary_output(&filtered_matches, &severity_counts), |
47 | 0 | _ => generate_production_text_output(&filtered_matches, &severity_counts), |
48 | }; | |
49 | ||
50 | // Write to file if specified, otherwise print to stdout | |
51 | 0 | if let Some(output_path) = output { |
52 | 0 | fs::write(&output_path, &output_content)?; |
53 | 0 | println!("๐ Report saved to: {}", output_path.display()); |
54 | 0 | } else { |
55 | 0 | println!("{}", output_content); |
56 | 0 | } |
57 | ||
58 | // Exit with appropriate code for CI/CD integration | |
59 | 0 | let critical_count = severity_counts.get("Critical").unwrap_or(&0); |
60 | 0 | let high_count = severity_counts.get("High").unwrap_or(&0); |
61 | ||
62 | 0 | if fail_on_critical && *critical_count > 0 { |
63 | 0 | eprintln!( |
64 | 0 | "โ Production check FAILED: {} critical issues found", |
65 | critical_count | |
66 | ); | |
67 | 0 | process::exit(1); |
68 | 0 | } |
69 | ||
70 | 0 | if fail_on_high && *high_count > 0 { |
71 | 0 | eprintln!( |
72 | 0 | "โ ๏ธ Production check FAILED: {} high severity issues found", |
73 | high_count | |
74 | ); | |
75 | 0 | process::exit(1); |
76 | 0 | } |
77 | ||
78 | 0 | if *critical_count > 0 || *high_count > 0 { |
79 | 0 | println!( |
80 | 0 | "โ ๏ธ Production readiness: {} - Address critical and high severity issues", |
81 | 0 | "NEEDS ATTENTION".yellow() |
82 | 0 | ); |
83 | 0 | } else { |
84 | 0 | println!("โ Production readiness: {}", "PASSED".green()); |
85 | 0 | } |
86 | ||
87 | 0 | Ok(()) |
88 | 0 | } |
89 | ||
90 | /// Handle pre-commit hook command | |
91 | 0 | pub fn handle_pre_commit(path: PathBuf, staged_only: bool, fast: bool) -> Result<()> { |
92 | 0 | println!("๐ง {} Pre-commit Check", "Code-Guardian".bold().cyan()); |
93 | ||
94 | 0 | let detectors: Vec<Box<dyn PatternDetector>> = if fast { |
95 | // Fast mode: only critical issues | |
96 | 0 | vec![ |
97 | 0 | Box::new(DebuggerDetector), |
98 | 0 | Box::new(ConsoleLogDetector), |
99 | 0 | Box::new(AlertDetector), |
100 | ] | |
101 | } else { | |
102 | 0 | DetectorFactory::create_production_ready_detectors() |
103 | }; | |
104 | ||
105 | 0 | let scanner = Scanner::new(detectors); |
106 | ||
107 | 0 | let matches = if staged_only { |
108 | // Check if we're in a git repository | |
109 | 0 | if !GitIntegration::is_git_repo(&path) { |
110 | 0 | println!("โ ๏ธ Not in a git repository. Scanning entire directory instead."); |
111 | 0 | scanner.scan(&path)? |
112 | } else { | |
113 | // Get repo root and staged files | |
114 | 0 | let repo_root = GitIntegration::get_repo_root(&path)?; |
115 | 0 | let staged_files = GitIntegration::get_staged_files(&repo_root)?; |
116 | ||
117 | 0 | if staged_files.is_empty() { |
118 | 0 | println!("โน๏ธ No staged files found. Nothing to scan."); |
119 | 0 | return Ok(()); |
120 | 0 | } |
121 | ||
122 | 0 | println!("๐ Scanning {} staged file(s)...", staged_files.len()); |
123 | 0 | if !fast { |
124 | 0 | for file in &staged_files { |
125 | 0 | println!(" ๐ {}", file.display()); |
126 | 0 | } |
127 | 0 | } |
128 | ||
129 | // Scan only staged files | |
130 | 0 | let mut all_matches = Vec::new(); |
131 | 0 | for file_path in staged_files { |
132 | 0 | if file_path.is_file() { |
133 | // For now, use the directory scanner on each file's parent | |
134 | // This is a workaround until we implement file-specific scanning | |
135 | 0 | if let Some(parent) = file_path.parent() { |
136 | 0 | let file_matches = scanner.scan(parent)?; |
137 | // Filter matches to only include the specific file | |
138 | 0 | let filtered_matches: Vec<_> = file_matches |
139 | 0 | .into_iter() |
140 | 0 | .filter(|m| m.file_path == file_path.to_string_lossy()) |
141 | 0 | .collect(); |
142 | 0 | all_matches.extend(filtered_matches); |
143 | 0 | } |
144 | 0 | } |
145 | } | |
146 | 0 | all_matches |
147 | } | |
148 | } else { | |
149 | 0 | scanner.scan(&path)? |
150 | }; | |
151 | ||
152 | 0 | let severity_counts = count_by_severity(&matches); |
153 | 0 | let critical_count = severity_counts.get("Critical").unwrap_or(&0); |
154 | 0 | let high_count = severity_counts.get("High").unwrap_or(&0); |
155 | ||
156 | 0 | if *critical_count > 0 { |
157 | 0 | eprintln!( |
158 | 0 | "โ Pre-commit check FAILED: {} critical issues", |
159 | critical_count | |
160 | ); | |
161 | 0 | for m in matches.iter().filter(|m| is_critical_severity(&m.pattern)) { |
162 | 0 | eprintln!(" {} [{}] {}", m.file_path, m.pattern.red(), m.message); |
163 | 0 | } |
164 | 0 | process::exit(1); |
165 | 0 | } |
166 | ||
167 | 0 | if *high_count > 0 { |
168 | 0 | println!( |
169 | 0 | "โ ๏ธ {} high severity issues found (warnings only)", |
170 | high_count | |
171 | ); | |
172 | 0 | for m in matches.iter().filter(|m| is_high_severity(&m.pattern)) { |
173 | 0 | println!(" {} [{}] {}", m.file_path, m.pattern.yellow(), m.message); |
174 | 0 | } |
175 | 0 | } |
176 | ||
177 | 0 | println!("โ Pre-commit check passed"); |
178 | 0 | Ok(()) |
179 | 0 | } |
180 | ||
181 | /// Handle CI/CD gate command | |
182 | 0 | pub fn handle_ci_gate( |
183 | 0 | path: PathBuf, |
184 | 0 | _config: Option<PathBuf>, |
185 | 0 | output: Option<PathBuf>, |
186 | 0 | max_critical: u32, |
187 | 0 | max_high: u32, |
188 | 0 | ) -> Result<()> { |
189 | 0 | println!("๐ฆ {} CI/CD Gate", "Code-Guardian".bold().green()); |
190 | ||
191 | 0 | let detectors = DetectorFactory::create_production_ready_detectors(); |
192 | 0 | let scanner = Scanner::new(detectors); |
193 | 0 | let matches = scanner.scan(&path)?; |
194 | ||
195 | 0 | let severity_counts = count_by_severity(&matches); |
196 | 0 | let critical_count = *severity_counts.get("Critical").unwrap_or(&0) as u32; |
197 | 0 | let high_count = *severity_counts.get("High").unwrap_or(&0) as u32; |
198 | ||
199 | // Generate JSON report for CI/CD systems | |
200 | 0 | let report = serde_json::json!({ |
201 | 0 | "status": if critical_count <= max_critical && high_count <= max_high { "PASS" } else { "FAIL" }, |
202 | 0 | "summary": { |
203 | 0 | "critical": critical_count, |
204 | 0 | "high": high_count, |
205 | 0 | "total": matches.len() |
206 | }, | |
207 | 0 | "thresholds": { |
208 | 0 | "max_critical": max_critical, |
209 | 0 | "max_high": max_high |
210 | }, | |
211 | 0 | "matches": matches.iter().map(|m| serde_json::json!({ |
212 | 0 | "file": m.file_path, |
213 | 0 | "line": m.line_number, |
214 | 0 | "column": m.column, |
215 | 0 | "pattern": m.pattern, |
216 | 0 | "message": m.message, |
217 | 0 | "severity": get_severity_for_pattern(&m.pattern) |
218 | 0 | })).collect::<Vec<_>>() |
219 | }); | |
220 | ||
221 | 0 | let json_output = serde_json::to_string_pretty(&report)?; |
222 | ||
223 | 0 | if let Some(output_path) = output { |
224 | 0 | fs::write(&output_path, &json_output)?; |
225 | 0 | println!("๐ CI report saved to: {}", output_path.display()); |
226 | 0 | } |
227 | ||
228 | // Print summary | |
229 | 0 | println!("๐ Results:"); |
230 | 0 | println!(" Critical: {}/{}", critical_count, max_critical); |
231 | 0 | println!(" High: {}/{}", high_count, max_high); |
232 | ||
233 | 0 | if critical_count > max_critical { |
234 | 0 | eprintln!( |
235 | 0 | "โ CI Gate FAILED: Too many critical issues ({} > {})", |
236 | critical_count, max_critical | |
237 | ); | |
238 | 0 | process::exit(1); |
239 | 0 | } |
240 | ||
241 | 0 | if high_count > max_high { |
242 | 0 | eprintln!( |
243 | 0 | "โ CI Gate FAILED: Too many high severity issues ({} > {})", |
244 | high_count, max_high | |
245 | ); | |
246 | 0 | process::exit(1); |
247 | 0 | } |
248 | ||
249 | 0 | println!("โ CI Gate PASSED"); |
250 | 0 | Ok(()) |
251 | 0 | } |
252 | ||
253 | /// Handle language-specific scanning | |
254 | 0 | pub fn handle_lang_scan( |
255 | 0 | languages: Vec<String>, |
256 | 0 | path: PathBuf, |
257 | 0 | format: String, |
258 | 0 | production: bool, |
259 | 0 | ) -> Result<()> { |
260 | 0 | println!( |
261 | 0 | "๐ {} Language-Specific Scan", |
262 | 0 | "Code-Guardian".bold().magenta() |
263 | ); | |
264 | 0 | println!("๐ฏ Languages: {}", languages.join(", ")); |
265 | ||
266 | 0 | let extensions = map_languages_to_extensions(&languages); |
267 | 0 | println!("๐ File extensions: {}", extensions.join(", ")); |
268 | ||
269 | 0 | let detectors = if production { |
270 | 0 | DetectorFactory::create_production_ready_detectors() |
271 | } else { | |
272 | 0 | DetectorFactory::create_comprehensive_detectors() |
273 | }; | |
274 | ||
275 | 0 | let scanner = Scanner::new(detectors); |
276 | 0 | let all_matches = scanner.scan(&path)?; |
277 | ||
278 | // Filter matches to only include specified language extensions | |
279 | 0 | let filtered_matches: Vec<Match> = all_matches |
280 | 0 | .into_iter() |
281 | 0 | .filter(|m| { |
282 | 0 | extensions |
283 | 0 | .iter() |
284 | 0 | .any(|ext| m.file_path.ends_with(&format!(".{}", ext))) |
285 | 0 | }) |
286 | 0 | .collect(); |
287 | ||
288 | 0 | let severity_counts = count_by_severity(&filtered_matches); |
289 | ||
290 | 0 | match format.as_str() { |
291 | 0 | "json" => { |
292 | 0 | let json_output = generate_json_output(&filtered_matches, &severity_counts)?; |
293 | 0 | println!("{}", json_output); |
294 | } | |
295 | 0 | "summary" => { |
296 | 0 | let summary = generate_summary_output(&filtered_matches, &severity_counts); |
297 | 0 | println!("{}", summary); |
298 | 0 | } |
299 | 0 | _ => { |
300 | 0 | let text_output = generate_production_text_output(&filtered_matches, &severity_counts); |
301 | 0 | println!("{}", text_output); |
302 | 0 | } |
303 | } | |
304 | ||
305 | 0 | Ok(()) |
306 | 0 | } |
307 | ||
308 | /// Handle file watching command | |
309 | 0 | pub fn handle_watch( |
310 | 0 | _path: PathBuf, |
311 | 0 | _include: Vec<String>, |
312 | 0 | _exclude: Vec<String>, |
313 | 0 | _delay: u64, |
314 | 0 | ) -> Result<()> { |
315 | 0 | println!("๐๏ธ {} File Watching", "Code-Guardian".bold().cyan()); |
316 | 0 | println!("โ ๏ธ File watching feature coming soon!"); |
317 | 0 | println!("๐ This will enable real-time scanning as you edit files"); |
318 | 0 | Ok(()) |
319 | 0 | } |
320 | ||
321 | // Helper functions | |
322 | ||
323 | 0 | fn filter_by_severity(matches: Vec<Match>, severity_filter: &[String]) -> Vec<Match> { |
324 | 0 | matches |
325 | 0 | .into_iter() |
326 | 0 | .filter(|m| { |
327 | 0 | let severity = get_severity_for_pattern(&m.pattern); |
328 | 0 | severity_filter.contains(&severity) |
329 | 0 | }) |
330 | 0 | .collect() |
331 | 0 | } |
332 | ||
333 | 2 | fn count_by_severity(matches: &[Match]) -> HashMap<String, usize> { |
334 | 2 | let mut counts = HashMap::new(); |
335 | 2 | for |
336 | 0 | let severity = get_severity_for_pattern(&m.pattern); |
337 | 0 | *counts.entry(severity).or_insert(0) += 1; |
338 | 0 | } |
339 | 2 | counts |
340 | 2 | } |
341 | ||
342 | 10 | fn get_severity_for_pattern(pattern: &str) -> String { |
343 | 10 | match pattern { |
344 | 10 | "DEBUGGER" => |
345 | 8 | "DEV" | "STAGING" | "CONSOLE_LOG" | |
346 | 6 | "DEBUG" | "TEST" | "PHASE" | "PRINT" | |
347 | 4 | | "UNWRAP" => |
348 | 4 | _ => "Low", |
349 | } | |
350 | 10 | .to_string() |
351 | 10 | } |
352 | ||
353 | 8 | fn is_critical_severity(pattern: &str) -> bool { |
354 | 8 |
|
355 | 8 | } |
356 | ||
357 | 12 | fn is_high_severity(pattern: &str) -> bool { |
358 | 12 |
|
359 | 12 | } |
360 | ||
361 | 12 | fn map_languages_to_extensions(languages: &[String]) -> Vec<String> { |
362 | 12 | let mut extensions = Vec::new(); |
363 | 28 | for |
364 | 16 | match lang.as_str() { |
365 | 16 | "js" | |
366 | 12 | "ts" | "typescript" => |
367 | 10 | "py" | |
368 | 6 | "rs" | |
369 | 2 | "go" => extensions.push("go"), |
370 | 0 | "java" => extensions.push("java"), |
371 | 0 | "cs" | "csharp" => extensions.push("cs"), |
372 | 0 | "php" => extensions.push("php"), |
373 | 0 | "rb" | "ruby" => extensions.push("rb"), |
374 | 0 | "kt" | "kotlin" => extensions.push("kt"), |
375 | 0 | "swift" => extensions.push("swift"), |
376 | 0 | "dart" => extensions.push("dart"), |
377 | 0 | "cpp" | "c++" => extensions.extend_from_slice(&["cpp", "cxx", "cc"]), |
378 | 0 | "c" => extensions.extend_from_slice(&["c", "h"]), |
379 | 0 | "vue" => extensions.push("vue"), |
380 | 0 | "svelte" => extensions.push("svelte"), |
381 | 0 | _ => extensions.push(lang), // Pass through unknown extensions |
382 | } | |
383 | } | |
384 | 12 | extensions.into_iter().map(String::from).collect() |
385 | 12 | } |
386 | ||
387 | 0 | fn generate_json_output( |
388 | 0 | matches: &[Match], |
389 | 0 | severity_counts: &HashMap<String, usize>, |
390 | 0 | ) -> Result<String> { |
391 | 0 | let output = serde_json::json!({ |
392 | 0 | "summary": severity_counts, |
393 | 0 | "total": matches.len(), |
394 | 0 | "matches": matches |
395 | }); | |
396 | 0 | Ok(serde_json::to_string_pretty(&output)?) |
397 | 0 | } |
398 | ||
399 | 0 | fn generate_summary_output(matches: &[Match], severity_counts: &HashMap<String, usize>) -> String { |
400 | 0 | let mut output = String::new(); |
401 | 0 | output.push_str(&format!("๐ {} Summary\n", "Code-Guardian".bold())); |
402 | 0 | output.push_str(&format!("Total Issues: {}\n", matches.len())); |
403 | ||
404 | 0 | for (severity, count) in severity_counts { |
405 | 0 | let icon = match severity.as_str() { |
406 | 0 | "Critical" => "๐ด", |
407 | 0 | "High" => "๐ ", |
408 | 0 | "Medium" => "๐ก", |
409 | 0 | "Low" => "๐ข", |
410 | 0 | _ => "โช", |
411 | }; | |
412 | 0 | output.push_str(&format!("{} {}: {}\n", icon, severity, count)); |
413 | } | |
414 | 0 | output |
415 | 0 | } |
416 | ||
417 | 0 | fn generate_production_text_output( |
418 | 0 | matches: &[Match], |
419 | 0 | severity_counts: &HashMap<String, usize>, |
420 | 0 | ) -> String { |
421 | 0 | let mut output = String::new(); |
422 | ||
423 | 0 | output.push_str(&format!( |
424 | 0 | "๐ {} Production Readiness Report\n\n", |
425 | 0 | "Code-Guardian".bold().blue() |
426 | 0 | )); |
427 | ||
428 | // Group matches by severity | |
429 | 0 | let mut critical_issues = Vec::new(); |
430 | 0 | let mut high_issues = Vec::new(); |
431 | 0 | let mut medium_issues = Vec::new(); |
432 | 0 | let mut low_issues = Vec::new(); |
433 | ||
434 | 0 | for m in matches { |
435 | 0 | match get_severity_for_pattern(&m.pattern).as_str() { |
436 | 0 | "Critical" => critical_issues.push(m), |
437 | 0 | "High" => high_issues.push(m), |
438 | 0 | "Medium" => medium_issues.push(m), |
439 | 0 | "Low" => low_issues.push(m), |
440 | 0 | _ => {} |
441 | } | |
442 | } | |
443 | ||
444 | // Display issues by severity | |
445 | 0 | if !critical_issues.is_empty() { |
446 | 0 | output.push_str(&format!( |
447 | 0 | "๐ด {} ({}):\n", |
448 | 0 | "Critical Issues".red().bold(), |
449 | 0 | critical_issues.len() |
450 | 0 | )); |
451 | 0 | for issue in critical_issues { |
452 | 0 | output.push_str(&format!( |
453 | 0 | "โโโ {}:{} [{}] {}\n", |
454 | 0 | issue.file_path, |
455 | 0 | issue.line_number, |
456 | 0 | issue.pattern.red(), |
457 | 0 | issue.message |
458 | 0 | )); |
459 | 0 | } |
460 | 0 | output.push('\n'); |
461 | 0 | } |
462 | ||
463 | 0 | if !high_issues.is_empty() { |
464 | 0 | output.push_str(&format!( |
465 | 0 | "๐ {} ({}):\n", |
466 | 0 | "High Severity".yellow().bold(), |
467 | 0 | high_issues.len() |
468 | 0 | )); |
469 | 0 | for issue in high_issues { |
470 | 0 | output.push_str(&format!( |
471 | 0 | "โโโ {}:{} [{}] {}\n", |
472 | 0 | issue.file_path, |
473 | 0 | issue.line_number, |
474 | 0 | issue.pattern.yellow(), |
475 | 0 | issue.message |
476 | 0 | )); |
477 | 0 | } |
478 | 0 | output.push('\n'); |
479 | 0 | } |
480 | ||
481 | 0 | if !medium_issues.is_empty() { |
482 | 0 | output.push_str(&format!( |
483 | 0 | "๐ก {} ({}):\n", |
484 | 0 | "Medium Severity".cyan().bold(), |
485 | 0 | medium_issues.len() |
486 | 0 | )); |
487 | 0 | for issue in medium_issues { |
488 | 0 | output.push_str(&format!( |
489 | 0 | "โโโ {}:{} [{}] {}\n", |
490 | 0 | issue.file_path, |
491 | 0 | issue.line_number, |
492 | 0 | issue.pattern.cyan(), |
493 | 0 | issue.message |
494 | 0 | )); |
495 | 0 | } |
496 | 0 | output.push('\n'); |
497 | 0 | } |
498 | ||
499 | 0 | if !low_issues.is_empty() { |
500 | 0 | output.push_str(&format!( |
501 | 0 | "๐ข {} ({}):\n", |
502 | 0 | "Low Severity".green().bold(), |
503 | 0 | low_issues.len() |
504 | 0 | )); |
505 | 0 | for issue in low_issues.iter().take(5) { |
506 | 0 | // Limit low severity to first 5 |
507 | 0 | output.push_str(&format!( |
508 | 0 | "โโโ {}:{} [{}] {}\n", |
509 | 0 | issue.file_path, |
510 | 0 | issue.line_number, |
511 | 0 | issue.pattern.green(), |
512 | 0 | issue.message |
513 | 0 | )); |
514 | 0 | } |
515 | 0 | if low_issues.len() > 5 { |
516 | 0 | output.push_str(&format!( |
517 | 0 | "โโโ ... and {} more low severity issues\n", |
518 | 0 | low_issues.len() - 5 |
519 | 0 | )); |
520 | 0 | } |
521 | 0 | output.push('\n'); |
522 | 0 | } |
523 | ||
524 | // Summary | |
525 | 0 | output.push_str("๐ Summary:\n"); |
526 | 0 | output.push_str(&format!("โข Total Issues: {}\n", matches.len())); |
527 | 0 | output.push_str(&format!( |
528 | 0 | "โข Critical: {}\n", |
529 | 0 | severity_counts.get("Critical").unwrap_or(&0) |
530 | 0 | )); |
531 | 0 | output.push_str(&format!( |
532 | 0 | "โข High: {}\n", |
533 | 0 | severity_counts.get("High").unwrap_or(&0) |
534 | 0 | )); |
535 | 0 | output.push_str(&format!( |
536 | 0 | "โข Medium: {}\n", |
537 | 0 | severity_counts.get("Medium").unwrap_or(&0) |
538 | 0 | )); |
539 | 0 | output.push_str(&format!( |
540 | 0 | "โข Low: {}\n", |
541 | 0 | severity_counts.get("Low").unwrap_or(&0) |
542 | 0 | )); |
543 | ||
544 | 0 | output |
545 | 0 | } |
546 | ||
547 | #[cfg(test)] | |
548 | mod tests { | |
549 | use super::*; | |
550 | ||
551 | #[test] | |
552 | 2 | fn test_map_languages_to_extensions() { |
553 | 2 | let languages = vec!["js".to_string(), "py".to_string(), "rs".to_string()]; |
554 | 2 | let extensions = map_languages_to_extensions(&languages); |
555 | ||
556 | 2 | assert!(extensions.contains(&"js".to_string())); |
557 | 2 | assert!(extensions.contains(&"jsx".to_string())); |
558 | 2 | assert!(extensions.contains(&"py".to_string())); |
559 | 2 | assert!(extensions.contains(&"rs".to_string())); |
560 | 2 | assert_eq!(extensions.len(), 4); // js, jsx, py, rs |
561 | 2 | } |
562 | ||
563 | #[test] | |
564 | 2 | fn test_get_severity_for_pattern() { |
565 | 2 | assert_eq!(get_severity_for_pattern("DEBUGGER"), "Critical"); |
566 | 2 | assert_eq!(get_severity_for_pattern("CONSOLE_LOG"), "High"); |
567 | 2 | assert_eq!(get_severity_for_pattern("PRINT"), "Medium"); |
568 | 2 | assert_eq!(get_severity_for_pattern("TODO"), "Low"); |
569 | 2 | assert_eq!(get_severity_for_pattern("UNKNOWN"), "Low"); |
570 | 2 | } |
571 | ||
572 | #[test] | |
573 | 2 | fn test_is_critical_severity() { |
574 | 2 | assert!(is_critical_severity("DEBUGGER")); |
575 | 2 | assert!(!is_critical_severity("CONSOLE_LOG")); |
576 | 2 | assert!(!is_critical_severity("PRINT")); |
577 | 2 | assert!(!is_critical_severity("TODO")); |
578 | 2 | } |
579 | ||
580 | #[test] | |
581 | 2 | fn test_is_high_severity() { |
582 | 2 | assert!(is_high_severity("DEV")); |
583 | 2 | assert!(is_high_severity("STAGING")); |
584 | 2 | assert!(is_high_severity("CONSOLE_LOG")); |
585 | 2 | assert!(is_high_severity("ALERT")); |
586 | 2 | assert!(!is_high_severity("DEBUGGER")); |
587 | 2 | assert!(!is_high_severity("PRINT")); |
588 | 2 | } |
589 | ||
590 | #[test] | |
591 | 2 | fn test_count_by_severity_empty() { |
592 | 2 | let matches = vec![]; |
593 | 2 | let counts = count_by_severity(&matches); |
594 | 2 | assert!(counts.is_empty()); |
595 | 2 | } |
596 | ||
597 | #[test] | |
598 | 2 | fn test_language_mapping_comprehensive() { |
599 | 2 | let test_cases = vec![ |
600 | 2 | ("javascript", vec!["js", "jsx"]), |
601 | 2 | ("typescript", vec!["ts", "tsx"]), |
602 | 2 | ("python", vec!["py"]), |
603 | 2 | ("rust", vec!["rs"]), |
604 | 2 | ("go", vec!["go"]), |
605 | ]; | |
606 | ||
607 | 12 | for ( |
608 | 10 | let languages = vec![lang.to_string()]; |
609 | 10 | let extensions = map_languages_to_extensions(&languages); |
610 | ||
611 | 24 | for |
612 | 14 | assert!( |
613 | 14 | extensions.contains(&expected_ext.to_string()), |
614 | 0 | "Language '{}' should map to extension '{}'", |
615 | lang, | |
616 | expected_ext | |
617 | ); | |
618 | } | |
619 | } | |
620 | 2 | } |
621 | } |
Line | Count | Source |
1 | use anyhow::Result; | |
2 | use code_guardian_output::formatters::{ | |
3 | CsvFormatter, Formatter, HtmlFormatter, JsonFormatter, MarkdownFormatter, TextFormatter, | |
4 | }; | |
5 | use code_guardian_storage::{ScanRepository, SqliteScanRepository}; | |
6 | use std::path::PathBuf; | |
7 | ||
8 | use crate::utils::get_db_path; | |
9 | ||
10 | 8 | pub fn handle_report(id: i64, format: String, db: Option<PathBuf>) -> Result<()> { |
11 | 8 | let |
12 | 7 | let db_path = get_db_path(db); |
13 | 7 | let repo = SqliteScanRepository::new(&db_path) |
14 | 7 | let scan = repo.get_scan(id) |
15 | 7 | match scan { |
16 | 6 | Some(scan) => { |
17 | 6 | println!("{}", formatter.format(&scan.matches)); |
18 | 6 | } |
19 | 1 | None => println!("Scan with ID {} not found.", id), |
20 | } | |
21 | 7 | Ok(()) |
22 | 8 | } |
23 | ||
24 | 10 | pub fn get_formatter(format: &str) -> Result<Box<dyn Formatter>> { |
25 | 10 | match format { |
26 | 10 | "text" => |
27 | 7 | "json" => |
28 | 4 | "csv" => |
29 | 3 | "markdown" => |
30 | 2 | "html" => |
31 | 1 | _ => Err(anyhow::anyhow!("Unsupported format: {}", format)), |
32 | } | |
33 | 10 | } |
Line | Count | Source |
1 | use anyhow::Result; | |
2 | use code_guardian_core::{ | |
3 | config::load_config, CustomDetectorManager, DistributedCoordinator, IncrementalScanner, | |
4 | OptimizedScanner, Scanner, StreamingScanner, WorkerConfig, | |
5 | }; | |
6 | use code_guardian_output::formatters::Formatter; | |
7 | use code_guardian_storage::{Scan, ScanRepository, SqliteScanRepository}; | |
8 | use indicatif::ProgressBar; | |
9 | use std::path::PathBuf; | |
10 | ||
11 | use crate::utils::get_detectors_from_profile; | |
12 | ||
13 | #[derive(Debug)] | |
14 | pub struct ScanOptions { | |
15 | pub path: PathBuf, | |
16 | pub db: Option<PathBuf>, | |
17 | pub config_path: Option<PathBuf>, | |
18 | pub profile: String, | |
19 | pub show_progress: bool, | |
20 | pub optimize: bool, | |
21 | pub streaming: bool, | |
22 | pub show_metrics: bool, | |
23 | pub incremental: bool, | |
24 | pub distributed: bool, | |
25 | pub custom_detectors: Option<PathBuf>, | |
26 | pub cache_size: Option<usize>, | |
27 | pub batch_size: Option<usize>, | |
28 | pub max_file_size: Option<usize>, | |
29 | pub max_threads: Option<usize>, | |
30 | } | |
31 | ||
32 | 24 | pub async fn handle_scan(options: ScanOptions) -> Result<()> { |
33 | 24 | if !options.path.exists() { |
34 | 2 | return Err(anyhow::anyhow!( |
35 | 2 | "Path '{}' does not exist", |
36 | 2 | options.path.display() |
37 | 2 | )); |
38 | 22 | } |
39 | 22 | if !options.path.is_dir() { |
40 | 0 | return Err(anyhow::anyhow!( |
41 | 0 | "Path '{}' is not a directory", |
42 | 0 | options.path.display() |
43 | 0 | )); |
44 | 22 | } |
45 | 22 | let |
46 | // Override config with CLI args if provided | |
47 | 21 | if let Some( |
48 | 2 | config.cache_size = val; |
49 | 19 | } |
50 | 21 | if let Some( |
51 | 2 | config.batch_size = val; |
52 | 19 | } |
53 | 21 | if let Some( |
54 | 2 | config.max_file_size = val; |
55 | 19 | } |
56 | 21 | if let Some( |
57 | 5 | config.max_threads = val; |
58 | 16 | } |
59 | 21 | let db_path = options |
60 | 21 | .db |
61 | 21 | .unwrap_or_else(|| |
62 | 21 | let mut repo = SqliteScanRepository::new(&db_path) |
63 | ||
64 | // Load custom detectors if specified | |
65 | 21 | let mut custom_detector_manager = CustomDetectorManager::new(); |
66 | 21 | if let Some( |
67 | 1 | custom_detector_manager.load_from_file(&custom_path) |
68 | 1 | println!("๐ Loaded custom detectors from {}", custom_path.display()); |
69 | 20 | } |
70 | ||
71 | // Create scanner based on profile | |
72 | 21 | let mut detectors = get_detectors_from_profile(&options.profile); |
73 | ||
74 | // Add custom detectors | |
75 | 21 | let custom_detectors_vec = custom_detector_manager.get_detectors(); |
76 | 21 | if !custom_detectors_vec.is_empty() { |
77 | 1 | detectors.extend(custom_detectors_vec); |
78 | 1 | println!( |
79 | 1 | "๐ง Added {} custom detectors", |
80 | 1 | detectors.len() - get_detectors_from_profile(&options.profile).len() |
81 | 1 | ); |
82 | 20 | } |
83 | ||
84 | 21 | let pb = if options.show_progress { |
85 | 2 | let pb = ProgressBar::new_spinner(); |
86 | 2 | pb.set_message("Scanning directory for patterns..."); |
87 | 2 | Some(pb) |
88 | } else { | |
89 | 19 | None |
90 | }; | |
91 | ||
92 | 21 | let (matches, scan_metrics) = if options.incremental { |
93 | // Use incremental scanning | |
94 | 4 | if let Some( |
95 | 0 | pb.set_message("Incremental scanning (only changed files)..."); |
96 | 4 | } |
97 | ||
98 | 4 | let state_file = db_path.with_extension("incremental"); |
99 | 4 | let mut incremental_scanner = IncrementalScanner::new(detectors, state_file) |
100 | 4 | let (matches, result) = incremental_scanner.scan_incremental(&options.path) |
101 | ||
102 | // Convert incremental result to scan metrics | |
103 | 4 | let metrics = code_guardian_core::ScanMetrics { |
104 | 4 | total_files_scanned: result.files_scanned, |
105 | 4 | total_lines_processed: 0, // Not tracked in incremental |
106 | 4 | total_matches_found: result.total_matches, |
107 | 4 | scan_duration_ms: result.scan_duration_ms, |
108 | 4 | cache_hits: result.files_skipped, |
109 | 4 | cache_misses: result.files_scanned, |
110 | 4 | }; |
111 | ||
112 | 4 | (matches, Some(metrics)) |
113 | 17 | } else if options.distributed { |
114 | // Use distributed scanning | |
115 | 1 | if let Some( |
116 | 0 | pb.set_message("Distributed scanning across multiple workers..."); |
117 | 1 | } |
118 | ||
119 | 1 | let mut coordinator = DistributedCoordinator::new(); |
120 | ||
121 | // Register simulated workers | |
122 | 5 | for |
123 | 4 | let worker_config = WorkerConfig { |
124 | 4 | worker_id: format!("worker_{}", i), |
125 | 4 | max_concurrent_units: 2, |
126 | 4 | supported_detectors: vec!["TODO".to_string(), "FIXME".to_string()], |
127 | 4 | cpu_cores: 2, |
128 | 4 | memory_limit_mb: 1024, |
129 | 4 | endpoint: None, |
130 | 4 | }; |
131 | 4 | coordinator.register_worker(worker_config); |
132 | 4 | } |
133 | ||
134 | // Register detectors with coordinator | |
135 | 2 | for (i, detector) in |
136 | 2 | coordinator.register_detector(format!("detector_{}", i), detector); |
137 | 2 | } |
138 | ||
139 | // Collect files | |
140 | 1 | let files: Vec<PathBuf> = ignore::WalkBuilder::new(&options.path) |
141 | 1 | .build() |
142 | 3 | . |
143 | 3 | entry.ok().and_then(|e| { |
144 | 3 | if e.file_type() |
145 | 2 | Some(e.path().to_path_buf()) |
146 | } else { | |
147 | 1 | None |
148 | } | |
149 | 3 | }) |
150 | 3 | }) |
151 | 1 | .collect(); |
152 | ||
153 | 1 | coordinator.create_work_units(files, config.batch_size) |
154 | 1 | let matches = coordinator.execute_distributed_scan().await |
155 | ||
156 | // Create basic metrics | |
157 | 1 | let metrics = code_guardian_core::ScanMetrics { |
158 | 1 | total_files_scanned: coordinator.get_statistics().total_files_processed, |
159 | 1 | total_lines_processed: 0, |
160 | 1 | total_matches_found: matches.len(), |
161 | 1 | scan_duration_ms: 100, // Placeholder |
162 | 1 | cache_hits: 0, |
163 | 1 | cache_misses: 0, |
164 | 1 | }; |
165 | ||
166 | 1 | (matches, Some(metrics)) |
167 | 16 | } else if options.streaming { |
168 | // Use streaming scanner for large codebases | |
169 | 2 | if let Some( |
170 | 1 | pb.set_message("Streaming scan of large codebase..."); |
171 | 1 | } |
172 | ||
173 | 2 | let streaming_scanner = StreamingScanner::new(detectors); |
174 | 2 | let mut all_matches = Vec::new(); |
175 | ||
176 | 2 | let metrics = streaming_scanner.scan_streaming(&options.path, |batch_matches| { |
177 | 2 | all_matches.extend(batch_matches); |
178 | 2 | Ok(()) |
179 | 2 | }) |
180 | ||
181 | 2 | (all_matches, Some(metrics)) |
182 | 14 | } else if options.optimize { |
183 | // Use optimized scanner | |
184 | 2 | if let Some( |
185 | 0 | pb.set_message("Optimized scanning with caching..."); |
186 | 2 | } |
187 | ||
188 | 2 | let optimized_scanner = OptimizedScanner::new(detectors).with_cache_size(config.cache_size); |
189 | 2 | let (matches, metrics) = optimized_scanner.scan_optimized(&options.path) |
190 | 2 | (matches, Some(metrics)) |
191 | } else { | |
192 | // Use standard scanner | |
193 | 12 | if let Some( |
194 | 1 | pb.set_message("Scanning directory for patterns..."); |
195 | 11 | } |
196 | ||
197 | 12 | let scanner = Scanner::new(detectors); |
198 | 12 | let matches = scanner.scan(&options.path) |
199 | 12 | (matches, None) |
200 | }; | |
201 | ||
202 | 21 | if let Some( |
203 | 2 | pb.finish_with_message("Scan completed."); |
204 | 19 | } |
205 | 21 | let timestamp = chrono::Utc::now().timestamp(); |
206 | 21 | let scan = Scan { |
207 | 21 | id: None, |
208 | 21 | timestamp, |
209 | 21 | root_path: options.path.to_string_lossy().to_string(), |
210 | 21 | matches: matches.clone(), |
211 | 21 | }; |
212 | 21 | let id = repo.save_scan(&scan) |
213 | 21 | println!("Scan saved with ID: {}", id); |
214 | ||
215 | // Show performance metrics if requested | |
216 | 21 | if options.show_metrics { |
217 | 2 | if let Some(metrics) = scan_metrics { |
218 | 2 | println!("\n๐ Performance Metrics:"); |
219 | 2 | println!(" Files scanned: {}", metrics.total_files_scanned); |
220 | 2 | println!(" Lines processed: {}", metrics.total_lines_processed); |
221 | 2 | println!(" Matches found: {}", metrics.total_matches_found); |
222 | 2 | println!(" Scan duration: {}ms", metrics.scan_duration_ms); |
223 | ||
224 | 2 | if metrics.cache_hits > 0 || metrics.cache_misses > 0 { |
225 | 1 | let hit_rate = |
226 | 1 | metrics.cache_hits as f64 / (metrics.cache_hits + metrics.cache_misses) as f64; |
227 | 1 | println!(" Cache hit rate: {:.1}%", hit_rate * 100.0); |
228 | 1 | } |
229 | ||
230 | 2 | let files_per_sec = |
231 | 2 | metrics.total_files_scanned as f64 / (metrics.scan_duration_ms as f64 / 1000.0); |
232 | 2 | let lines_per_sec = |
233 | 2 | metrics.total_lines_processed as f64 / (metrics.scan_duration_ms as f64 / 1000.0); |
234 | 2 | println!( |
235 | 2 | " Performance: {:.1} files/sec, {:.1} lines/sec", |
236 | files_per_sec, lines_per_sec | |
237 | ); | |
238 | 0 | } |
239 | 2 | println!(); |
240 | 19 | } |
241 | ||
242 | 21 | let formatter = code_guardian_output::formatters::TextFormatter; |
243 | 21 | println!("{}", formatter.format(&matches)); |
244 | 21 | Ok(()) |
245 | 24 | } |
Line | Count | Source |
1 | use anyhow::Result; | |
2 | ||
3 | use crate::cli_definitions::StackPreset; | |
4 | use crate::production_handlers::handle_lang_scan; | |
5 | ||
6 | /// Handle stack preset commands by mapping them to appropriate language configurations | |
7 | 0 | pub fn handle_stack_preset(preset: StackPreset) -> Result<()> { |
8 | 0 | match preset { |
9 | 0 | StackPreset::Web { path, production } => { |
10 | 0 | let languages = vec![ |
11 | 0 | "js".to_string(), |
12 | 0 | "ts".to_string(), |
13 | 0 | "jsx".to_string(), |
14 | 0 | "tsx".to_string(), |
15 | 0 | "vue".to_string(), |
16 | 0 | "svelte".to_string(), |
17 | ]; | |
18 | 0 | handle_lang_scan(languages, path, "text".to_string(), production) |
19 | } | |
20 | 0 | StackPreset::Backend { path, production } => { |
21 | 0 | let languages = vec![ |
22 | 0 | "py".to_string(), |
23 | 0 | "java".to_string(), |
24 | 0 | "go".to_string(), |
25 | 0 | "cs".to_string(), |
26 | 0 | "php".to_string(), |
27 | 0 | "rb".to_string(), |
28 | ]; | |
29 | 0 | handle_lang_scan(languages, path, "text".to_string(), production) |
30 | } | |
31 | 0 | StackPreset::Fullstack { path, production } => { |
32 | 0 | let languages = vec![ |
33 | 0 | "js".to_string(), |
34 | 0 | "ts".to_string(), |
35 | 0 | "py".to_string(), |
36 | 0 | "java".to_string(), |
37 | 0 | "go".to_string(), |
38 | 0 | "rs".to_string(), |
39 | ]; | |
40 | 0 | handle_lang_scan(languages, path, "text".to_string(), production) |
41 | } | |
42 | 0 | StackPreset::Mobile { path, production } => { |
43 | 0 | let languages = vec![ |
44 | 0 | "js".to_string(), |
45 | 0 | "ts".to_string(), |
46 | 0 | "swift".to_string(), |
47 | 0 | "kt".to_string(), |
48 | 0 | "dart".to_string(), |
49 | ]; | |
50 | 0 | handle_lang_scan(languages, path, "text".to_string(), production) |
51 | } | |
52 | 0 | StackPreset::Systems { path, production } => { |
53 | 0 | let languages = vec![ |
54 | 0 | "rs".to_string(), |
55 | 0 | "cpp".to_string(), |
56 | 0 | "c".to_string(), |
57 | 0 | "go".to_string(), |
58 | ]; | |
59 | 0 | handle_lang_scan(languages, path, "text".to_string(), production) |
60 | } | |
61 | } | |
62 | 0 | } |
Line | Count | Source |
1 | use code_guardian_core::{DetectorProfile, PatternDetector}; | |
2 | use std::path::PathBuf; | |
3 | ||
4 | /// Get the database path, defaulting to "data/code-guardian.db" if not provided. | |
5 | 15 | pub fn get_db_path(db: Option<PathBuf>) -> PathBuf { |
6 | 15 | db.unwrap_or_else(|| |
7 | 15 | } |
8 | ||
9 | /// Get detectors based on the profile string. | |
10 | 48 | pub fn get_detectors_from_profile(profile: &str) -> Vec<Box<dyn PatternDetector>> { |
11 | 48 | match profile { |
12 | 48 | "basic" => |
13 | 23 | "comprehensive" => |
14 | 17 | "security" => |
15 | 12 | "performance" => |
16 | 6 | "rust" => |
17 | 2 | "llm-security" => |
18 | 2 | "llm-quality" => |
19 | 2 | "llm-comprehensive" => |
20 | 2 | "production-ready-llm" => |
21 | _ => { | |
22 | 2 | println!("Unknown profile '{}', using 'basic'", profile); |
23 | 2 | DetectorProfile::Basic.get_detectors() |
24 | } | |
25 | } | |
26 | 48 | } |
27 | ||
28 | #[cfg(test)] | |
29 | mod tests { | |
30 | use super::*; | |
31 | use std::path::PathBuf; | |
32 | ||
33 | #[test] | |
34 | 2 | fn test_get_db_path_with_provided_path() { |
35 | 2 | let custom_path = PathBuf::from("/custom/path/db.sqlite"); |
36 | 2 | let result = get_db_path(Some(custom_path.clone())); |
37 | 2 | assert_eq!(result, custom_path); |
38 | 2 | } |
39 | ||
40 | #[test] | |
41 | 2 | fn test_get_db_path_with_none() { |
42 | 2 | let result = get_db_path(None); |
43 | 2 | assert_eq!(result, PathBuf::from("data/code-guardian.db")); |
44 | 2 | } |
45 | ||
46 | #[test] | |
47 | 2 | fn test_get_detectors_from_profile_basic() { |
48 | 2 | let detectors = get_detectors_from_profile("basic"); |
49 | 2 | assert!( |
50 | 2 | !detectors.is_empty(), |
51 | 0 | "Basic profile should return detectors" |
52 | ); | |
53 | 2 | } |
54 | ||
55 | #[test] | |
56 | 2 | fn test_get_detectors_from_profile_comprehensive() { |
57 | 2 | let detectors = get_detectors_from_profile("comprehensive"); |
58 | 2 | assert!( |
59 | 2 | !detectors.is_empty(), |
60 | 0 | "Comprehensive profile should return detectors" |
61 | ); | |
62 | ||
63 | // Comprehensive should have more detectors than basic | |
64 | 2 | let basic_detectors = get_detectors_from_profile("basic"); |
65 | 2 | assert!( |
66 | 2 | detectors.len() >= basic_detectors.len(), |
67 | 0 | "Comprehensive should have at least as many detectors as basic" |
68 | ); | |
69 | 2 | } |
70 | ||
71 | #[test] | |
72 | 2 | fn test_get_detectors_from_profile_security() { |
73 | 2 | let detectors = get_detectors_from_profile("security"); |
74 | 2 | assert!( |
75 | 2 | !detectors.is_empty(), |
76 | 0 | "Security profile should return detectors" |
77 | ); | |
78 | 2 | } |
79 | ||
80 | #[test] | |
81 | 2 | fn test_get_detectors_from_profile_performance() { |
82 | 2 | let detectors = get_detectors_from_profile("performance"); |
83 | 2 | assert!( |
84 | 2 | !detectors.is_empty(), |
85 | 0 | "Performance profile should return detectors" |
86 | ); | |
87 | 2 | } |
88 | ||
89 | #[test] | |
90 | 2 | fn test_get_detectors_from_profile_rust() { |
91 | 2 | let detectors = get_detectors_from_profile("rust"); |
92 | 2 | assert!( |
93 | 2 | !detectors.is_empty(), |
94 | 0 | "Rust profile should return detectors" |
95 | ); | |
96 | 2 | } |
97 | ||
98 | #[test] | |
99 | 2 | fn test_get_detectors_from_profile_unknown() { |
100 | // This test captures stdout to verify the warning message | |
101 | 2 | let detectors = get_detectors_from_profile("unknown_profile"); |
102 | 2 | assert!( |
103 | 2 | !detectors.is_empty(), |
104 | 0 | "Unknown profile should fallback to basic detectors" |
105 | ); | |
106 | ||
107 | // Should fallback to basic profile | |
108 | 2 | let basic_detectors = get_detectors_from_profile("basic"); |
109 | 2 | assert_eq!( |
110 | 2 | detectors.len(), |
111 | 2 | basic_detectors.len(), |
112 | 0 | "Unknown profile should return same as basic profile" |
113 | ); | |
114 | 2 | } |
115 | ||
116 | #[test] | |
117 | 2 | fn test_all_profiles_return_valid_detectors() { |
118 | 2 | let profiles = ["basic", "comprehensive", "security", "performance", "rust"]; |
119 | ||
120 | 12 | for |
121 | 10 | let detectors = get_detectors_from_profile(profile); |
122 | 10 | assert!( |
123 | 10 | !detectors.is_empty(), |
124 | 0 | "Profile '{}' should return at least one detector", |
125 | profile | |
126 | ); | |
127 | } | |
128 | 2 | } |
129 | } |
Line | Count | Source |
1 | use dashmap::DashMap; | |
2 | ||
3 | /// Trait for caching analysis results. | |
4 | /// Key is typically a file path or module identifier. | |
5 | /// Value is the serialized analysis result. | |
6 | pub trait Cache<K, V>: Send + Sync | |
7 | where | |
8 | K: Eq + std::hash::Hash + Clone + Send + Sync, | |
9 | V: Clone + Send + Sync, | |
10 | { | |
11 | fn get(&self, key: &K) -> Option<V>; | |
12 | fn set(&self, key: K, value: V); | |
13 | fn clear(&self); | |
14 | } | |
15 | ||
16 | /// In-memory cache using DashMap for concurrent access. | |
17 | pub struct InMemoryCache<K, V> { | |
18 | map: DashMap<K, V>, | |
19 | } | |
20 | ||
21 | impl<K, V> InMemoryCache<K, V> | |
22 | where | |
23 | K: Eq + std::hash::Hash + Clone + Send + Sync, | |
24 | V: Clone + Send + Sync, | |
25 | { | |
26 | 0 | pub fn new() -> Self { |
27 | 0 | Self { |
28 | 0 | map: DashMap::new(), |
29 | 0 | } |
30 | 0 | } |
31 | } | |
32 | ||
33 | impl<K, V> Cache<K, V> for InMemoryCache<K, V> | |
34 | where | |
35 | K: Eq + std::hash::Hash + Clone + Send + Sync, | |
36 | V: Clone + Send + Sync, | |
37 | { | |
38 | 0 | fn get(&self, key: &K) -> Option<V> { |
39 | 0 | self.map.get(key).map(|value| value.clone()) |
40 | 0 | } |
41 | ||
42 | 0 | fn set(&self, key: K, value: V) { |
43 | 0 | self.map.insert(key, value); |
44 | 0 | } |
45 | ||
46 | 0 | fn clear(&self) { |
47 | 0 | self.map.clear(); |
48 | 0 | } |
49 | } | |
50 | ||
51 | impl<K, V> Default for InMemoryCache<K, V> | |
52 | where | |
53 | K: Eq + std::hash::Hash + Clone + Send + Sync, | |
54 | V: Clone + Send + Sync, | |
55 | { | |
56 | 0 | fn default() -> Self { |
57 | 0 | Self::new() |
58 | 0 | } |
59 | } | |
60 | ||
61 | #[cfg(test)] | |
62 | mod tests { | |
63 | use super::*; | |
64 | ||
65 | #[test] | |
66 | fn test_in_memory_cache_basic_operations() { | |
67 | let cache: InMemoryCache<String, String> = InMemoryCache::new(); | |
68 | ||
69 | // Test initial empty state | |
70 | assert!(cache.get(&"key1".to_string()).is_none()); | |
71 | ||
72 | // Test set and get | |
73 | cache.set("key1".to_string(), "value1".to_string()); | |
74 | assert_eq!(cache.get(&"key1".to_string()), Some("value1".to_string())); | |
75 | ||
76 | // Test overwrite | |
77 | cache.set("key1".to_string(), "value2".to_string()); | |
78 | assert_eq!(cache.get(&"key1".to_string()), Some("value2".to_string())); | |
79 | ||
80 | // Test multiple keys | |
81 | cache.set("key2".to_string(), "value3".to_string()); | |
82 | assert_eq!(cache.get(&"key1".to_string()), Some("value2".to_string())); | |
83 | assert_eq!(cache.get(&"key2".to_string()), Some("value3".to_string())); | |
84 | } | |
85 | ||
86 | #[test] | |
87 | fn test_cache_clear() { | |
88 | let cache: InMemoryCache<String, String> = InMemoryCache::new(); | |
89 | ||
90 | cache.set("key1".to_string(), "value1".to_string()); | |
91 | cache.set("key2".to_string(), "value2".to_string()); | |
92 | ||
93 | assert!(cache.get(&"key1".to_string()).is_some()); | |
94 | assert!(cache.get(&"key2".to_string()).is_some()); | |
95 | ||
96 | cache.clear(); | |
97 | ||
98 | assert!(cache.get(&"key1".to_string()).is_none()); | |
99 | assert!(cache.get(&"key2".to_string()).is_none()); | |
100 | } | |
101 | ||
102 | #[test] | |
103 | fn test_cache_with_different_types() { | |
104 | let cache: InMemoryCache<i32, Vec<String>> = InMemoryCache::new(); | |
105 | ||
106 | let value = vec!["item1".to_string(), "item2".to_string()]; | |
107 | cache.set(42, value.clone()); | |
108 | ||
109 | assert_eq!(cache.get(&42), Some(value)); | |
110 | assert!(cache.get(&43).is_none()); | |
111 | } | |
112 | ||
113 | #[test] | |
114 | fn test_cache_default() { | |
115 | let cache: InMemoryCache<String, i32> = InMemoryCache::default(); | |
116 | assert!(cache.get(&"test".to_string()).is_none()); | |
117 | ||
118 | cache.set("test".to_string(), 100); | |
119 | assert_eq!(cache.get(&"test".to_string()), Some(100)); | |
120 | } | |
121 | ||
122 | #[test] | |
123 | fn test_cache_concurrent_access() { | |
124 | use std::sync::Arc; | |
125 | use std::thread; | |
126 | ||
127 | let cache: Arc<InMemoryCache<String, String>> = Arc::new(InMemoryCache::new()); | |
128 | let mut handles = vec![]; | |
129 | ||
130 | // Spawn multiple threads to test concurrent access | |
131 | for i in 0..10 { | |
132 | let cache_clone = Arc::clone(&cache); | |
133 | let handle = thread::spawn(move || { | |
134 | let key = format!("key_{}", i); | |
135 | let value = format!("value_{}", i); | |
136 | cache_clone.set(key.clone(), value.clone()); | |
137 | let retrieved = cache_clone.get(&key); | |
138 | assert_eq!(retrieved, Some(value)); | |
139 | }); | |
140 | handles.push(handle); | |
141 | } | |
142 | ||
143 | for handle in handles { | |
144 | handle.join().unwrap(); | |
145 | } | |
146 | ||
147 | // Verify all values are present | |
148 | for i in 0..10 { | |
149 | let key = format!("key_{}", i); | |
150 | let expected = format!("value_{}", i); | |
151 | assert_eq!(cache.get(&key), Some(expected)); | |
152 | } | |
153 | } | |
154 | } |
Line | Count | Source |
1 | use serde::{Deserialize, Serialize}; | |
2 | use std::path::Path; | |
3 | ||
4 | #[derive(Debug, Clone, Deserialize, Serialize)] | |
5 | pub struct Config { | |
6 | pub scan_patterns: Vec<String>, | |
7 | pub output_formats: Vec<String>, | |
8 | pub database_path: String, | |
9 | pub max_threads: usize, | |
10 | pub cache_size: usize, | |
11 | pub batch_size: usize, | |
12 | pub max_file_size: usize, | |
13 | } | |
14 | ||
15 | impl Default for Config { | |
16 | 0 | fn default() -> Self { |
17 | 0 | Self { |
18 | 0 | scan_patterns: vec!["*.rs".to_string(), "*.toml".to_string()], |
19 | 0 | output_formats: vec!["json".to_string()], |
20 | 0 | database_path: "code_guardian.db".to_string(), |
21 | 0 | max_threads: num_cpus::get(), |
22 | 0 | cache_size: 50000, |
23 | 0 | batch_size: 100, |
24 | 0 | max_file_size: 10 * 1024 * 1024, // 10MB |
25 | 0 | } |
26 | 0 | } |
27 | } | |
28 | ||
29 | 22 | pub fn load_config<P: AsRef<Path>>(path: Option<P>) -> anyhow::Result<Config> { |
30 | 22 | let mut builder = config::Config::builder(); |
31 | ||
32 | // Add default values | |
33 | 22 | builder = builder.set_default("scan_patterns", vec!["*.rs", "*.toml"]) |
34 | 22 | builder = builder.set_default("output_formats", vec!["json"]) |
35 | 22 | builder = builder.set_default("database_path", "code_guardian.db") |
36 | 22 | builder = builder.set_default("max_threads", num_cpus::get() as i64) |
37 | 22 | builder = builder.set_default("cache_size", 50000i64) |
38 | 22 | builder = builder.set_default("batch_size", 100i64) |
39 | 22 | builder = builder.set_default("max_file_size", (10 * 1024 * 1024) as i64) |
40 | ||
41 | // Add file source if provided | |
42 | 22 | if let Some( |
43 | 2 | let path = path.as_ref(); |
44 | 2 | if path.exists() { |
45 | 2 | let extension = path.extension().and_then(|s| s.to_str()).unwrap_or(""); |
46 | 2 | match extension { |
47 | 2 | "toml" => { |
48 | 2 | builder = builder.add_source(config::File::with_name(path.to_str().unwrap())); |
49 | 2 | } |
50 | 0 | "json" => { |
51 | 0 | builder = builder.add_source(config::File::with_name(path.to_str().unwrap())); |
52 | 0 | } |
53 | _ => { | |
54 | 0 | return Err(anyhow::anyhow!( |
55 | 0 | "Unsupported config file format: {}", |
56 | 0 | extension |
57 | 0 | )) |
58 | } | |
59 | } | |
60 | 0 | } |
61 | 20 | } |
62 | ||
63 | 22 | let |
64 | 21 | let parsed: Config = config.try_deserialize() |
65 | 21 | Ok(parsed) |
66 | 22 | } |
67 | ||
68 | #[cfg(test)] | |
69 | mod tests { | |
70 | use super::*; | |
71 | use std::fs; | |
72 | use tempfile::TempDir; | |
73 | ||
74 | #[test] | |
75 | fn test_default_config() { | |
76 | let config = Config::default(); | |
77 | assert!(!config.scan_patterns.is_empty()); | |
78 | assert!(!config.output_formats.is_empty()); | |
79 | assert!(!config.database_path.is_empty()); | |
80 | assert!(config.max_threads > 0); | |
81 | assert_eq!(config.cache_size, 50000); | |
82 | assert_eq!(config.batch_size, 100); | |
83 | assert_eq!(config.max_file_size, 10 * 1024 * 1024); | |
84 | } | |
85 | ||
86 | #[test] | |
87 | fn test_load_config_toml() { | |
88 | let temp_dir = TempDir::new().unwrap(); | |
89 | let config_path = temp_dir.path().join("config.toml"); | |
90 | let toml_content = r#" | |
91 | scan_patterns = ["*.rs", "*.py"] | |
92 | output_formats = ["json", "csv"] | |
93 | database_path = "test.db" | |
94 | max_threads = 4 | |
95 | cache_size = 100000 | |
96 | batch_size = 200 | |
97 | max_file_size = 20971520 | |
98 | "#; | |
99 | fs::write(&config_path, toml_content).unwrap(); | |
100 | ||
101 | let config = load_config(Some(&config_path)).unwrap(); | |
102 | assert_eq!(config.scan_patterns, vec!["*.rs", "*.py"]); | |
103 | assert_eq!(config.output_formats, vec!["json", "csv"]); | |
104 | assert_eq!(config.database_path, "test.db"); | |
105 | assert_eq!(config.max_threads, 4); | |
106 | assert_eq!(config.cache_size, 100000); | |
107 | assert_eq!(config.batch_size, 200); | |
108 | assert_eq!(config.max_file_size, 20971520); | |
109 | } | |
110 | ||
111 | #[test] | |
112 | fn test_load_config_json() { | |
113 | let temp_dir = TempDir::new().unwrap(); | |
114 | let config_path = temp_dir.path().join("config.json"); | |
115 | let json_content = r#"{ | |
116 | "scan_patterns": ["*.js", "*.ts"], | |
117 | "output_formats": ["html"], | |
118 | "database_path": "data.db", | |
119 | "max_threads": 8, | |
120 | "cache_size": 75000, | |
121 | "batch_size": 150, | |
122 | "max_file_size": 15728640 | |
123 | }"#; | |
124 | fs::write(&config_path, json_content).unwrap(); | |
125 | ||
126 | let config = load_config(Some(&config_path)).unwrap(); | |
127 | assert_eq!(config.scan_patterns, vec!["*.js", "*.ts"]); | |
128 | assert_eq!(config.output_formats, vec!["html"]); | |
129 | assert_eq!(config.database_path, "data.db"); | |
130 | assert_eq!(config.max_threads, 8); | |
131 | assert_eq!(config.cache_size, 75000); | |
132 | assert_eq!(config.batch_size, 150); | |
133 | assert_eq!(config.max_file_size, 15728640); | |
134 | } | |
135 | ||
136 | #[test] | |
137 | fn test_load_config_no_file() { | |
138 | let config = load_config::<&str>(None).unwrap(); | |
139 | let default = Config::default(); | |
140 | assert_eq!(config.scan_patterns, default.scan_patterns); | |
141 | assert_eq!(config.output_formats, default.output_formats); | |
142 | assert_eq!(config.database_path, default.database_path); | |
143 | assert_eq!(config.max_threads, default.max_threads); | |
144 | assert_eq!(config.cache_size, default.cache_size); | |
145 | assert_eq!(config.batch_size, default.batch_size); | |
146 | assert_eq!(config.max_file_size, default.max_file_size); | |
147 | } | |
148 | ||
149 | #[test] | |
150 | fn test_load_config_unsupported_format() { | |
151 | let temp_dir = TempDir::new().unwrap(); | |
152 | let config_path = temp_dir.path().join("config.txt"); | |
153 | fs::write(&config_path, "invalid").unwrap(); | |
154 | ||
155 | let result = load_config(Some(&config_path)); | |
156 | assert!(result.is_err()); | |
157 | } | |
158 | } |
Line | Count | Source |
1 | use crate::{Match, PatternDetector, Severity}; | |
2 | use anyhow::Result; | |
3 | use regex::Regex; | |
4 | use serde::{Deserialize, Serialize}; | |
5 | use std::collections::HashMap; | |
6 | use std::path::Path; | |
7 | ||
8 | /// Configuration for a custom detector | |
9 | #[derive(Debug, Clone, Serialize, Deserialize)] | |
10 | pub struct CustomDetectorConfig { | |
11 | pub name: String, | |
12 | pub description: String, | |
13 | pub pattern: String, | |
14 | pub file_extensions: Vec<String>, // Empty = all files | |
15 | pub case_sensitive: bool, | |
16 | pub multiline: bool, | |
17 | pub capture_groups: Vec<String>, // Named capture groups | |
18 | pub severity: Severity, | |
19 | pub category: DetectorCategory, | |
20 | pub examples: Vec<String>, | |
21 | pub enabled: bool, | |
22 | } | |
23 | ||
24 | /// Categories for organizing custom detectors | |
25 | #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] | |
26 | pub enum DetectorCategory { | |
27 | CodeQuality, | |
28 | Security, | |
29 | Performance, | |
30 | Documentation, | |
31 | Testing, | |
32 | Deprecated, | |
33 | Custom(String), | |
34 | } | |
35 | ||
36 | /// A custom pattern detector built from configuration | |
37 | pub struct CustomDetector { | |
38 | config: CustomDetectorConfig, | |
39 | regex: Regex, | |
40 | } | |
41 | ||
42 | impl Clone for CustomDetector { | |
43 | 4 | fn clone(&self) -> Self { |
44 | 4 | Self::new(self.config.clone()).unwrap() |
45 | 4 | } |
46 | } | |
47 | ||
48 | impl CustomDetector { | |
49 | /// Create a new custom detector from configuration | |
50 | 20 | pub fn new(config: CustomDetectorConfig) -> Result<Self> { |
51 | 20 | let pattern = config.pattern.clone(); |
52 | ||
53 | // Build regex flags | |
54 | 20 | let mut regex_flags = regex::RegexBuilder::new(&pattern); |
55 | 20 | regex_flags.case_insensitive(!config.case_sensitive); |
56 | 20 | regex_flags.multi_line(config.multiline); |
57 | ||
58 | 20 | let regex = regex_flags |
59 | 20 | .build() |
60 | 20 | .map_err(|e| anyhow::anyhow!( |
61 | ||
62 | 20 | Ok(Self { config, regex }) |
63 | 20 | } |
64 | ||
65 | /// Get detector configuration | |
66 | 16 | pub fn config(&self) -> &CustomDetectorConfig { |
67 | 16 | &self.config |
68 | 16 | } |
69 | ||
70 | /// Check if this detector should process the given file | |
71 | 5 | fn should_process_file(&self, file_path: &Path) -> bool { |
72 | 5 | if self.config.file_extensions.is_empty() { |
73 | 1 | return true; // Process all files |
74 | 4 | } |
75 | ||
76 | 4 | if let Some(ext) = file_path.extension().and_then(|s| s.to_str()) { |
77 | 4 | self.config |
78 | 4 | .file_extensions |
79 | 4 | .iter() |
80 | 6 | . |
81 | } else { | |
82 | 0 | false |
83 | } | |
84 | 5 | } |
85 | } | |
86 | ||
87 | impl PatternDetector for CustomDetector { | |
88 | 5 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
89 | 5 | if !self.config.enabled || !self.should_process_file(file_path) { |
90 | 4 | return Vec::new(); |
91 | 1 | } |
92 | ||
93 | 1 | let mut matches = Vec::new(); |
94 | ||
95 | 1 | for |
96 | 0 | if let Some(full_match) = cap.get(0) { |
97 | // Find line and column | |
98 | 0 | let (line_number, column) = find_line_column(content, full_match.start()); |
99 | ||
100 | // Extract message from capture groups or use full match | |
101 | 0 | let message = if !self.config.capture_groups.is_empty() { |
102 | 0 | self.extract_message_from_groups(&cap) |
103 | } else { | |
104 | 0 | full_match.as_str().trim().to_string() |
105 | }; | |
106 | ||
107 | 0 | matches.push(Match { |
108 | 0 | file_path: file_path.to_string_lossy().to_string(), |
109 | 0 | line_number, |
110 | 0 | column, |
111 | 0 | pattern: self.config.name.clone(), |
112 | 0 | message: format!("{}: {}", self.config.name, message), |
113 | 0 | }); |
114 | 0 | } |
115 | } | |
116 | ||
117 | 1 | matches |
118 | 5 | } |
119 | } | |
120 | ||
121 | impl CustomDetector { | |
122 | 0 | fn extract_message_from_groups(&self, cap: ®ex::Captures) -> String { |
123 | 0 | let mut parts = Vec::new(); |
124 | ||
125 | 0 | for group_name in &self.config.capture_groups { |
126 | 0 | if let Some(group_match) = cap.name(group_name) { |
127 | 0 | parts.push(format!("{}={}", group_name, group_match.as_str())); |
128 | 0 | } |
129 | } | |
130 | ||
131 | 0 | if parts.is_empty() { |
132 | 0 | cap.get(0) |
133 | 0 | .map_or("".to_string(), |m| m.as_str().to_string()) |
134 | } else { | |
135 | 0 | parts.join(", ") |
136 | } | |
137 | 0 | } |
138 | } | |
139 | ||
140 | /// Manager for custom detectors | |
141 | pub struct CustomDetectorManager { | |
142 | detectors: HashMap<String, CustomDetector>, | |
143 | config_file: Option<std::path::PathBuf>, | |
144 | } | |
145 | ||
146 | impl CustomDetectorManager { | |
147 | 26 | pub fn new() -> Self { |
148 | 26 | Self { |
149 | 26 | detectors: HashMap::new(), |
150 | 26 | config_file: None, |
151 | 26 | } |
152 | 26 | } |
153 | ||
154 | /// Load detectors from configuration file | |
155 | 3 | pub fn load_from_file<P: AsRef<Path>>(&mut self, config_file: P) -> Result<()> { |
156 | 3 | let config_file = config_file.as_ref(); |
157 | 3 | let content = std::fs::read_to_string(config_file) |
158 | ||
159 | 3 | let configs: Vec<CustomDetectorConfig> = |
160 | 3 | match config_file.extension().and_then(|s| s.to_str()) { |
161 | 3 | Some("json") => serde_json::from_str(&content) |
162 | 0 | Some("yaml") | Some("yml") => serde_yaml::from_str(&content)?, |
163 | 0 | Some("toml") => toml::from_str(&content)?, |
164 | 0 | _ => return Err(anyhow::anyhow!("Unsupported config file format")), |
165 | }; | |
166 | ||
167 | 10 | for |
168 | 7 | let detector = CustomDetector::new(config.clone()) |
169 | 7 | self.detectors.insert(config.name.clone(), detector); |
170 | } | |
171 | ||
172 | 3 | self.config_file = Some(config_file.to_path_buf()); |
173 | 3 | println!( |
174 | 3 | "๐ Loaded {} custom detectors from {}", |
175 | 3 | self.detectors.len(), |
176 | 3 | config_file.display() |
177 | ); | |
178 | ||
179 | 3 | Ok(()) |
180 | 3 | } |
181 | ||
182 | /// Save detectors to configuration file | |
183 | 3 | pub fn save_to_file<P: AsRef<Path>>(&self, config_file: P) -> Result<()> { |
184 | 3 | let configs: Vec<CustomDetectorConfig> = self |
185 | 3 | .detectors |
186 | 3 | .values() |
187 | 9 | . |
188 | 3 | .collect(); |
189 | ||
190 | 3 | let config_file = config_file.as_ref(); |
191 | 3 | let content = match config_file.extension().and_then(|s| s.to_str()) { |
192 | 3 | Some("json") => serde_json::to_string_pretty(&configs) |
193 | 0 | Some("yaml") | Some("yml") => serde_yaml::to_string(&configs)?, |
194 | 0 | Some("toml") => toml::to_string_pretty(&configs)?, |
195 | 0 | _ => return Err(anyhow::anyhow!("Unsupported config file format")), |
196 | }; | |
197 | ||
198 | 3 | std::fs::write(config_file, content) |
199 | 3 | println!( |
200 | 3 | "๐พ Saved {} custom detectors to {}", |
201 | 3 | configs.len(), |
202 | 3 | config_file.display() |
203 | ); | |
204 | ||
205 | 3 | Ok(()) |
206 | 3 | } |
207 | ||
208 | /// Add a new custom detector | |
209 | 9 | pub fn add_detector(&mut self, config: CustomDetectorConfig) -> Result<()> { |
210 | 9 | let name = config.name.clone(); |
211 | 9 | let detector = CustomDetector::new(config) |
212 | 9 | self.detectors.insert(name.clone(), detector); |
213 | 9 | println!("โ Added custom detector: {}", name); |
214 | 9 | Ok(()) |
215 | 9 | } |
216 | ||
217 | /// Remove a custom detector | |
218 | 0 | pub fn remove_detector(&mut self, name: &str) -> bool { |
219 | 0 | if self.detectors.remove(name).is_some() { |
220 | 0 | println!("โ Removed custom detector: {}", name); |
221 | 0 | true |
222 | } else { | |
223 | 0 | false |
224 | } | |
225 | 0 | } |
226 | ||
227 | /// Get all custom detectors as PatternDetector trait objects | |
228 | 22 | pub fn get_detectors(&self) -> Vec<Box<dyn PatternDetector>> { |
229 | 22 | self.detectors |
230 | 22 | .values() |
231 | 22 | .filter(|d| |
232 | 22 | .map(|d| |
233 | 22 | .collect() |
234 | 22 | } |
235 | ||
236 | /// List all detector configurations | |
237 | 1 | pub fn list_detectors(&self) -> Vec<&CustomDetectorConfig> { |
238 | 3 |
|
239 | 1 | } |
240 | ||
241 | /// Enable/disable a detector | |
242 | 0 | pub fn set_detector_enabled(&mut self, name: &str, enabled: bool) -> Result<()> { |
243 | 0 | if let Some(detector) = self.detectors.get_mut(name) { |
244 | // Note: We'd need to modify CustomDetector to allow config mutation | |
245 | // For now, we'll recreate the detector with updated config | |
246 | 0 | let mut config = detector.config().clone(); |
247 | 0 | config.enabled = enabled; |
248 | 0 | let new_detector = CustomDetector::new(config)?; |
249 | 0 | self.detectors.insert(name.to_string(), new_detector); |
250 | 0 | println!( |
251 | 0 | "๐ {} detector: {}", |
252 | 0 | if enabled { "Enabled" } else { "Disabled" }, |
253 | name | |
254 | ); | |
255 | 0 | Ok(()) |
256 | } else { | |
257 | 0 | Err(anyhow::anyhow!("Detector '{}' not found", name)) |
258 | } | |
259 | 0 | } |
260 | ||
261 | /// Create some example detectors | |
262 | 3 | pub fn create_examples(&mut self) -> Result<()> { |
263 | 3 | let examples = vec![ |
264 | 3 | CustomDetectorConfig { |
265 | 3 | name: "SQL_INJECTION".to_string(), |
266 | 3 | description: "Detect potential SQL injection vulnerabilities".to_string(), |
267 | 3 | pattern: r#"(?i)(query|execute)\s*\(\s*["']\s*SELECT.*\+.*["']\s*\)"#.to_string(), |
268 | 3 | file_extensions: vec!["py".to_string(), "js".to_string(), "php".to_string()], |
269 | 3 | case_sensitive: false, |
270 | 3 | multiline: false, |
271 | 3 | capture_groups: vec![], |
272 | 3 | severity: Severity::Critical, |
273 | 3 | category: DetectorCategory::Security, |
274 | 3 | examples: vec![r#"query("SELECT * FROM users WHERE id = " + user_id)"#.to_string()], |
275 | 3 | enabled: true, |
276 | 3 | }, |
277 | 3 | CustomDetectorConfig { |
278 | 3 | name: "HARDCODED_PASSWORD".to_string(), |
279 | 3 | description: "Detect hardcoded passwords and secrets".to_string(), |
280 | 3 | pattern: r#"(?i)(password|secret|key|token)\s*[=:]\s*["'][^"']{8,}["']"# |
281 | 3 | .to_string(), |
282 | 3 | file_extensions: vec![], |
283 | 3 | case_sensitive: false, |
284 | 3 | multiline: false, |
285 | 3 | capture_groups: vec![], |
286 | 3 | severity: Severity::High, |
287 | 3 | category: DetectorCategory::Security, |
288 | 3 | examples: vec![r#"password = "secretpassword123""#.to_string()], |
289 | 3 | enabled: true, |
290 | 3 | }, |
291 | 3 | CustomDetectorConfig { |
292 | 3 | name: "LARGE_FUNCTION".to_string(), |
293 | 3 | description: "Detect functions that might be too large".to_string(), |
294 | 3 | pattern: r#"fn\s+\w+[^{]*\{(?:[^{}]*\{[^{}]*\})*[^{}]{500,}\}"#.to_string(), |
295 | 3 | file_extensions: vec!["rs".to_string()], |
296 | 3 | case_sensitive: true, |
297 | 3 | multiline: true, |
298 | 3 | capture_groups: vec![], |
299 | 3 | severity: Severity::Medium, |
300 | 3 | category: DetectorCategory::CodeQuality, |
301 | 3 | examples: vec!["Functions with more than 500 characters in body".to_string()], |
302 | 3 | enabled: true, |
303 | 3 | }, |
304 | ]; | |
305 | ||
306 | 12 | for |
307 | 9 | self.add_detector(config) |
308 | } | |
309 | ||
310 | 3 | Ok(()) |
311 | 3 | } |
312 | } | |
313 | ||
314 | impl Default for CustomDetectorManager { | |
315 | 0 | fn default() -> Self { |
316 | 0 | Self::new() |
317 | 0 | } |
318 | } | |
319 | ||
320 | /// Helper function to find line and column from byte offset | |
321 | 0 | fn find_line_column(content: &str, offset: usize) -> (usize, usize) { |
322 | 0 | let mut line = 1; |
323 | 0 | let mut column = 1; |
324 | ||
325 | 0 | for (i, ch) in content.char_indices() { |
326 | 0 | if i >= offset { |
327 | 0 | break; |
328 | 0 | } |
329 | ||
330 | 0 | if ch == '\n' { |
331 | 0 | line += 1; |
332 | 0 | column = 1; |
333 | 0 | } else { |
334 | 0 | column += 1; |
335 | 0 | } |
336 | } | |
337 | ||
338 | 0 | (line, column) |
339 | 0 | } |
340 | ||
341 | #[cfg(test)] | |
342 | mod tests { | |
343 | use super::*; | |
344 | ||
345 | #[test] | |
346 | fn test_custom_detector_creation() { | |
347 | let config = CustomDetectorConfig { | |
348 | name: "TEST".to_string(), | |
349 | description: "Test detector".to_string(), | |
350 | pattern: r"test".to_string(), | |
351 | file_extensions: vec!["rs".to_string()], | |
352 | case_sensitive: true, | |
353 | multiline: false, | |
354 | capture_groups: vec![], | |
355 | severity: Severity::Low, | |
356 | category: DetectorCategory::Testing, | |
357 | examples: vec![], | |
358 | enabled: true, | |
359 | }; | |
360 | ||
361 | let detector = CustomDetector::new(config); | |
362 | assert!(detector.is_ok()); | |
363 | } | |
364 | ||
365 | #[test] | |
366 | fn test_custom_detector_matching() { | |
367 | let config = CustomDetectorConfig { | |
368 | name: "TODO_CUSTOM".to_string(), | |
369 | description: "Custom TODO detector".to_string(), | |
370 | pattern: r"TODO:.*".to_string(), | |
371 | file_extensions: vec![], | |
372 | case_sensitive: false, | |
373 | multiline: false, | |
374 | capture_groups: vec![], | |
375 | severity: Severity::Low, | |
376 | category: DetectorCategory::Documentation, | |
377 | examples: vec![], | |
378 | enabled: true, | |
379 | }; | |
380 | ||
381 | let detector = CustomDetector::new(config).unwrap(); | |
382 | let content = "// TODO: implement this\nsome code"; | |
383 | let matches = detector.detect(content, Path::new("test.rs")); | |
384 | ||
385 | assert_eq!(matches.len(), 1); | |
386 | assert_eq!(matches[0].line_number, 1); | |
387 | } | |
388 | ||
389 | #[test] | |
390 | fn test_detector_manager() { | |
391 | let mut manager = CustomDetectorManager::new(); | |
392 | assert_eq!(manager.list_detectors().len(), 0); | |
393 | ||
394 | manager.create_examples().unwrap(); | |
395 | assert!(!manager.list_detectors().is_empty()); | |
396 | ||
397 | let detectors = manager.get_detectors(); | |
398 | assert!(!detectors.is_empty()); | |
399 | } | |
400 | ||
401 | #[test] | |
402 | fn test_empty_pattern() { | |
403 | let config = CustomDetectorConfig { | |
404 | name: "EMPTY".to_string(), | |
405 | description: "Empty pattern test".to_string(), | |
406 | pattern: "".to_string(), | |
407 | file_extensions: vec![], | |
408 | case_sensitive: true, | |
409 | multiline: false, | |
410 | capture_groups: vec![], | |
411 | severity: Severity::Low, | |
412 | category: DetectorCategory::Testing, | |
413 | examples: vec![], | |
414 | enabled: true, | |
415 | }; | |
416 | ||
417 | let detector = CustomDetector::new(config); | |
418 | // Empty pattern is actually valid in regex (matches empty string) | |
419 | assert!(detector.is_ok()); | |
420 | } | |
421 | ||
422 | #[test] | |
423 | fn test_complex_regex() { | |
424 | let config = CustomDetectorConfig { | |
425 | name: "COMPLEX".to_string(), | |
426 | description: "Complex regex with word boundaries".to_string(), | |
427 | pattern: r"\bclass\s+\w+\s+extends\s+\w+\s*\{".to_string(), | |
428 | file_extensions: vec!["js".to_string()], | |
429 | case_sensitive: true, | |
430 | multiline: false, | |
431 | capture_groups: vec![], | |
432 | severity: Severity::Medium, | |
433 | category: DetectorCategory::CodeQuality, | |
434 | examples: vec![], | |
435 | enabled: true, | |
436 | }; | |
437 | ||
438 | let detector = CustomDetector::new(config).unwrap(); | |
439 | let content = "class MyClass extends Base {\n constructor() {}\n}"; | |
440 | let matches = detector.detect(content, Path::new("test.js")); | |
441 | assert_eq!(matches.len(), 1); | |
442 | } | |
443 | ||
444 | #[test] | |
445 | fn test_large_content() { | |
446 | let config = CustomDetectorConfig { | |
447 | name: "LARGE_TEST".to_string(), | |
448 | description: "Test with large content".to_string(), | |
449 | pattern: r"TODO".to_string(), | |
450 | file_extensions: vec![], | |
451 | case_sensitive: false, | |
452 | multiline: false, | |
453 | capture_groups: vec![], | |
454 | severity: Severity::Low, | |
455 | category: DetectorCategory::Testing, | |
456 | examples: vec![], | |
457 | enabled: true, | |
458 | }; | |
459 | ||
460 | let detector = CustomDetector::new(config).unwrap(); | |
461 | let large_content = "some code\n".repeat(10000) | |
462 | + "// TODO: large file test\n" | |
463 | + &"more code\n".repeat(10000); | |
464 | let matches = detector.detect(&large_content, Path::new("large.rs")); | |
465 | assert_eq!(matches.len(), 1); | |
466 | assert_eq!(matches[0].line_number, 10001); | |
467 | } | |
468 | ||
469 | #[test] | |
470 | fn test_multiline_pattern() { | |
471 | let config = CustomDetectorConfig { | |
472 | name: "MULTILINE".to_string(), | |
473 | description: "Multiline pattern test".to_string(), | |
474 | pattern: r"function\s+\w+\([^)]*\)\s*\{[^}]*\}".to_string(), | |
475 | file_extensions: vec!["js".to_string()], | |
476 | case_sensitive: true, | |
477 | multiline: true, | |
478 | capture_groups: vec![], | |
479 | severity: Severity::Low, | |
480 | category: DetectorCategory::Testing, | |
481 | examples: vec![], | |
482 | enabled: true, | |
483 | }; | |
484 | ||
485 | let detector = CustomDetector::new(config).unwrap(); | |
486 | let content = "function test() {\n return true;\n}\nother code"; | |
487 | let matches = detector.detect(content, Path::new("test.js")); | |
488 | assert_eq!(matches.len(), 1); | |
489 | } | |
490 | ||
491 | #[test] | |
492 | fn test_case_insensitive() { | |
493 | let config = CustomDetectorConfig { | |
494 | name: "CASE_TEST".to_string(), | |
495 | description: "Case insensitive test".to_string(), | |
496 | pattern: r"todo".to_string(), | |
497 | file_extensions: vec![], | |
498 | case_sensitive: false, | |
499 | multiline: false, | |
500 | capture_groups: vec![], | |
501 | severity: Severity::Low, | |
502 | category: DetectorCategory::Testing, | |
503 | examples: vec![], | |
504 | enabled: true, | |
505 | }; | |
506 | ||
507 | let detector = CustomDetector::new(config).unwrap(); | |
508 | let content = "// TODO: case test\n// todo: another"; | |
509 | let matches = detector.detect(content, Path::new("test.rs")); | |
510 | assert_eq!(matches.len(), 2); | |
511 | } | |
512 | ||
513 | #[test] | |
514 | fn test_file_extension_filtering() { | |
515 | let config = CustomDetectorConfig { | |
516 | name: "EXT_TEST".to_string(), | |
517 | description: "File extension test".to_string(), | |
518 | pattern: r"test".to_string(), | |
519 | file_extensions: vec!["rs".to_string()], | |
520 | case_sensitive: true, | |
521 | multiline: false, | |
522 | capture_groups: vec![], | |
523 | severity: Severity::Low, | |
524 | category: DetectorCategory::Testing, | |
525 | examples: vec![], | |
526 | enabled: true, | |
527 | }; | |
528 | ||
529 | let detector = CustomDetector::new(config).unwrap(); | |
530 | let content = "test content"; | |
531 | ||
532 | // Should match .rs file | |
533 | let matches_rs = detector.detect(content, Path::new("test.rs")); | |
534 | assert_eq!(matches_rs.len(), 1); | |
535 | ||
536 | // Should not match .js file | |
537 | let matches_js = detector.detect(content, Path::new("test.js")); | |
538 | assert_eq!(matches_js.len(), 0); | |
539 | } | |
540 | ||
541 | #[test] | |
542 | fn test_capture_groups() { | |
543 | let config = CustomDetectorConfig { | |
544 | name: "CAPTURE".to_string(), | |
545 | description: "Capture groups test".to_string(), | |
546 | pattern: r"let\s+(?P<var>\w+)\s*=\s*(?P<value>\w+);".to_string(), | |
547 | file_extensions: vec![], | |
548 | case_sensitive: true, | |
549 | multiline: false, | |
550 | capture_groups: vec!["var".to_string(), "value".to_string()], | |
551 | severity: Severity::Low, | |
552 | category: DetectorCategory::Testing, | |
553 | examples: vec![], | |
554 | enabled: true, | |
555 | }; | |
556 | ||
557 | let detector = CustomDetector::new(config).unwrap(); | |
558 | let content = "let x = 42;"; | |
559 | let matches = detector.detect(content, Path::new("test.rs")); | |
560 | assert_eq!(matches.len(), 1); | |
561 | assert!(matches[0].message.contains("var=x")); | |
562 | assert!(matches[0].message.contains("value=42")); | |
563 | } | |
564 | ||
565 | #[test] | |
566 | fn test_disabled_detector() { | |
567 | let config = CustomDetectorConfig { | |
568 | name: "DISABLED".to_string(), | |
569 | description: "Disabled detector test".to_string(), | |
570 | pattern: r"test".to_string(), | |
571 | file_extensions: vec![], | |
572 | case_sensitive: true, | |
573 | multiline: false, | |
574 | capture_groups: vec![], | |
575 | severity: Severity::Low, | |
576 | category: DetectorCategory::Testing, | |
577 | examples: vec![], | |
578 | enabled: false, | |
579 | }; | |
580 | ||
581 | let detector = CustomDetector::new(config).unwrap(); | |
582 | let content = "test content"; | |
583 | let matches = detector.detect(content, Path::new("test.rs")); | |
584 | assert_eq!(matches.len(), 0); | |
585 | } | |
586 | ||
587 | #[test] | |
588 | fn test_invalid_regex() { | |
589 | let config = CustomDetectorConfig { | |
590 | name: "INVALID".to_string(), | |
591 | description: "Invalid regex test".to_string(), | |
592 | pattern: r"[unclosed".to_string(), | |
593 | file_extensions: vec![], | |
594 | case_sensitive: true, | |
595 | multiline: false, | |
596 | capture_groups: vec![], | |
597 | severity: Severity::Low, | |
598 | category: DetectorCategory::Testing, | |
599 | examples: vec![], | |
600 | enabled: true, | |
601 | }; | |
602 | ||
603 | let detector = CustomDetector::new(config); | |
604 | assert!(detector.is_err()); | |
605 | } | |
606 | } |
Line | Count | Source |
1 | use crate::detectors::*; | |
2 | use crate::enhanced_config::{DetectorType, EnhancedScanConfig}; | |
3 | use crate::llm_detectors::*; | |
4 | use crate::PatternDetector; | |
5 | use anyhow::Result; | |
6 | ||
7 | /// Factory for creating pattern detectors based on configuration | |
8 | pub struct DetectorFactory; | |
9 | ||
10 | impl DetectorFactory { | |
11 | /// Create all enabled detectors from configuration | |
12 | 0 | pub fn create_detectors(config: &EnhancedScanConfig) -> Vec<Box<dyn PatternDetector>> { |
13 | 0 | let mut detectors = Vec::new(); |
14 | 0 | for detector_type in &config.enabled_detectors { |
15 | 0 | match Self::create_detector(detector_type, Some(config)) { |
16 | 0 | Ok(Some(detector)) => detectors.push(detector), |
17 | 0 | Ok(None) => {} // Detector type not supported or disabled |
18 | 0 | Err(e) => eprintln!( |
19 | 0 | "Warning: Failed to create detector for {:?}: {}", |
20 | detector_type, e | |
21 | ), | |
22 | } | |
23 | } | |
24 | 0 | detectors |
25 | 0 | } |
26 | ||
27 | /// Create a default set of detectors (backwards compatibility) | |
28 | 30 | pub fn create_default_detectors() -> Vec<Box<dyn PatternDetector>> { |
29 | 30 | vec![Box::new(TodoDetector), Box::new(FixmeDetector)] |
30 | 30 | } |
31 | ||
32 | /// Create an extended set of detectors for comprehensive scanning | |
33 | 9 | pub fn create_comprehensive_detectors() -> Vec<Box<dyn PatternDetector>> { |
34 | 9 | vec![ |
35 | // Comment patterns | |
36 | 9 | Box::new(TodoDetector), |
37 | 9 | Box::new(FixmeDetector), |
38 | 9 | Box::new(HackDetector), |
39 | 9 | Box::new(BugDetector), |
40 | 9 | Box::new(XxxDetector), |
41 | 9 | Box::new(NoteDetector), |
42 | 9 | Box::new(WarningDetector), |
43 | // Rust-specific patterns | |
44 | 9 | Box::new(PanicDetector), |
45 | 9 | Box::new(UnwrapDetector), |
46 | 9 | Box::new(ExpectDetector), |
47 | 9 | Box::new(UnimplementedDetector), |
48 | 9 | Box::new(UnreachableDetector), |
49 | // Performance patterns | |
50 | 9 | Box::new(CloneDetector), |
51 | 9 | Box::new(ToStringDetector), |
52 | // Security patterns | |
53 | 9 | Box::new(UnsafeDetector), |
54 | // Development/Phase patterns | |
55 | 9 | Box::new(DevDetector), |
56 | 9 | Box::new(DebugDetector), |
57 | 9 | Box::new(TestDetector), |
58 | 9 | Box::new(PhaseDetector), |
59 | 9 | Box::new(StagingDetector), |
60 | // Non-production code patterns | |
61 | 9 | Box::new(ConsoleLogDetector), |
62 | 9 | Box::new(PrintDetector), |
63 | 9 | Box::new(AlertDetector), |
64 | 9 | Box::new(DebuggerDetector), |
65 | 9 | Box::new(UnusedVarDetector), |
66 | 9 | Box::new(DeadCodeDetector), |
67 | 9 | Box::new(ExperimentalDetector), |
68 | ] | |
69 | 9 | } |
70 | ||
71 | /// Create detectors specifically for finding non-production code | |
72 | 0 | pub fn create_production_ready_detectors() -> Vec<Box<dyn PatternDetector>> { |
73 | 0 | vec![ |
74 | // Development/Phase patterns | |
75 | 0 | Box::new(DevDetector), |
76 | 0 | Box::new(DebugDetector), |
77 | 0 | Box::new(TestDetector), |
78 | 0 | Box::new(PhaseDetector), |
79 | 0 | Box::new(StagingDetector), |
80 | // Non-production code patterns | |
81 | 0 | Box::new(ConsoleLogDetector), |
82 | 0 | Box::new(PrintDetector), |
83 | 0 | Box::new(AlertDetector), |
84 | 0 | Box::new(DebuggerDetector), |
85 | 0 | Box::new(UnusedVarDetector), |
86 | 0 | Box::new(DeadCodeDetector), |
87 | 0 | Box::new(ExperimentalDetector), |
88 | // Critical issues that shouldn't be in production | |
89 | 0 | Box::new(PanicDetector), |
90 | 0 | Box::new(UnwrapDetector), |
91 | 0 | Box::new(UnsafeDetector), |
92 | ] | |
93 | 0 | } |
94 | ||
95 | /// Create security-focused detectors | |
96 | 5 | pub fn create_security_detectors() -> Vec<Box<dyn PatternDetector>> { |
97 | 5 | vec![ |
98 | 5 | Box::new(UnsafeDetector), |
99 | 5 | Box::new(PanicDetector), |
100 | 5 | Box::new(UnwrapDetector), |
101 | 5 | Box::new(ExpectDetector), |
102 | ] | |
103 | 5 | } |
104 | ||
105 | /// Create LLM-specific vulnerability detectors | |
106 | 0 | pub fn create_llm_security_detectors() -> Vec<Box<dyn PatternDetector>> { |
107 | 0 | vec![ |
108 | 0 | Box::new(HallucinatedApiDetector), |
109 | 0 | Box::new(LLMSQLInjectionDetector), |
110 | 0 | Box::new(InsecureRandomDetector), |
111 | 0 | Box::new(HardcodedCredentialsDetector), |
112 | 0 | Box::new(RustMemorySafetyDetector), |
113 | 0 | Box::new(CryptoAntipatternDetector), |
114 | 0 | Box::new(XSSInjectionDetector), |
115 | 0 | Box::new(FilesystemSecurityDetector), |
116 | 0 | Box::new(ContextConfusionDetector), |
117 | ] | |
118 | 0 | } |
119 | ||
120 | /// Create comprehensive LLM detectors (all LLM-related patterns) | |
121 | 0 | pub fn create_llm_comprehensive_detectors() -> Vec<Box<dyn PatternDetector>> { |
122 | 0 | vec![Box::new(ComprehensiveLLMDetector::new())] |
123 | 0 | } |
124 | ||
125 | /// Create LLM performance and quality detectors | |
126 | 0 | pub fn create_llm_quality_detectors() -> Vec<Box<dyn PatternDetector>> { |
127 | 0 | vec![ |
128 | 0 | Box::new(AsyncAntipatternDetector), |
129 | 0 | Box::new(PerformanceAntipatternDetector), |
130 | 0 | Box::new(ErrorHandlingDetector), |
131 | 0 | Box::new(OverengineeringDetector), |
132 | 0 | Box::new(ConfigAntipatternDetector), |
133 | 0 | Box::new(DatabaseAntipatternDetector), |
134 | 0 | Box::new(JSLLMIssuesDetector), |
135 | 0 | Box::new(PythonLLMIssuesDetector), |
136 | ] | |
137 | 0 | } |
138 | ||
139 | /// Create detectors for production-ready scanning including LLM issues | |
140 | 0 | pub fn create_production_ready_with_llm_detectors() -> Vec<Box<dyn PatternDetector>> { |
141 | 0 | let mut detectors = Self::create_production_ready_detectors(); |
142 | 0 | detectors.extend(Self::create_llm_security_detectors()); |
143 | 0 | detectors.extend(Self::create_llm_quality_detectors()); |
144 | 0 | detectors.push(Box::new(LLMGeneratedCommentsDetector)); |
145 | 0 | detectors |
146 | 0 | } |
147 | ||
148 | /// Create performance-focused detectors | |
149 | 6 | pub fn create_performance_detectors() -> Vec<Box<dyn PatternDetector>> { |
150 | 6 | vec![ |
151 | 6 | Box::new(CloneDetector), |
152 | 6 | Box::new(ToStringDetector), |
153 | 6 | Box::new(UnwrapDetector), // Can cause performance issues |
154 | ] | |
155 | 6 | } |
156 | ||
157 | /// Create a single detector by type | |
158 | 0 | fn create_detector( |
159 | 0 | detector_type: &DetectorType, |
160 | 0 | config: Option<&EnhancedScanConfig>, |
161 | 0 | ) -> Result<Option<Box<dyn PatternDetector>>> { |
162 | 0 | match detector_type { |
163 | 0 | DetectorType::Todo => Ok(Some(Box::new(TodoDetector))), |
164 | 0 | DetectorType::Fixme => Ok(Some(Box::new(FixmeDetector))), |
165 | 0 | DetectorType::Hack => Ok(Some(Box::new(HackDetector))), |
166 | 0 | DetectorType::Bug => Ok(Some(Box::new(BugDetector))), |
167 | 0 | DetectorType::Xxx => Ok(Some(Box::new(XxxDetector))), |
168 | 0 | DetectorType::Note => Ok(Some(Box::new(NoteDetector))), |
169 | 0 | DetectorType::Warning => Ok(Some(Box::new(WarningDetector))), |
170 | 0 | DetectorType::Panic => Ok(Some(Box::new(PanicDetector))), |
171 | 0 | DetectorType::Unwrap => Ok(Some(Box::new(UnwrapDetector))), |
172 | 0 | DetectorType::Expect => Ok(Some(Box::new(ExpectDetector))), |
173 | 0 | DetectorType::Unimplemented => Ok(Some(Box::new(UnimplementedDetector))), |
174 | 0 | DetectorType::Unreachable => Ok(Some(Box::new(UnreachableDetector))), |
175 | 0 | DetectorType::Clone => Ok(Some(Box::new(CloneDetector))), |
176 | 0 | DetectorType::ToString => Ok(Some(Box::new(ToStringDetector))), |
177 | 0 | DetectorType::Unsafe => Ok(Some(Box::new(UnsafeDetector))), |
178 | ||
179 | // Development/Phase patterns | |
180 | 0 | DetectorType::Dev => Ok(Some(Box::new(DevDetector))), |
181 | 0 | DetectorType::Debug => Ok(Some(Box::new(DebugDetector))), |
182 | 0 | DetectorType::Test => Ok(Some(Box::new(TestDetector))), |
183 | 0 | DetectorType::Phase => Ok(Some(Box::new(PhaseDetector))), |
184 | 0 | DetectorType::Staging => Ok(Some(Box::new(StagingDetector))), |
185 | ||
186 | // Non-production code patterns | |
187 | 0 | DetectorType::ConsoleLog => Ok(Some(Box::new(ConsoleLogDetector))), |
188 | 0 | DetectorType::Print => Ok(Some(Box::new(PrintDetector))), |
189 | 0 | DetectorType::Alert => Ok(Some(Box::new(AlertDetector))), |
190 | 0 | DetectorType::Debugger => Ok(Some(Box::new(DebuggerDetector))), |
191 | 0 | DetectorType::UnusedVar => Ok(Some(Box::new(UnusedVarDetector))), |
192 | 0 | DetectorType::DeadCode => Ok(Some(Box::new(DeadCodeDetector))), |
193 | 0 | DetectorType::Experimental => Ok(Some(Box::new(ExperimentalDetector))), |
194 | ||
195 | // LLM-specific security patterns | |
196 | 0 | DetectorType::LLMHallucinatedApi => Ok(Some(Box::new(HallucinatedApiDetector))), |
197 | 0 | DetectorType::LLMSQLInjection => Ok(Some(Box::new(LLMSQLInjectionDetector))), |
198 | 0 | DetectorType::LLMInsecureRandom => Ok(Some(Box::new(InsecureRandomDetector))), |
199 | DetectorType::LLMHardcodedCredentials => { | |
200 | 0 | Ok(Some(Box::new(HardcodedCredentialsDetector))) |
201 | } | |
202 | 0 | DetectorType::LLMRustMemorySafety => Ok(Some(Box::new(RustMemorySafetyDetector))), |
203 | 0 | DetectorType::LLMCryptoAntipattern => Ok(Some(Box::new(CryptoAntipatternDetector))), |
204 | 0 | DetectorType::LLMXSSInjection => Ok(Some(Box::new(XSSInjectionDetector))), |
205 | 0 | DetectorType::LLMFilesystemSecurity => Ok(Some(Box::new(FilesystemSecurityDetector))), |
206 | 0 | DetectorType::LLMContextConfusion => Ok(Some(Box::new(ContextConfusionDetector))), |
207 | ||
208 | // LLM-specific quality patterns | |
209 | 0 | DetectorType::LLMAsyncAntipattern => Ok(Some(Box::new(AsyncAntipatternDetector))), |
210 | 0 | DetectorType::LLMPerformanceIssue => Ok(Some(Box::new(PerformanceAntipatternDetector))), |
211 | 0 | DetectorType::LLMErrorHandling => Ok(Some(Box::new(ErrorHandlingDetector))), |
212 | 0 | DetectorType::LLMOverengineering => Ok(Some(Box::new(OverengineeringDetector))), |
213 | 0 | DetectorType::LLMConfigAntipattern => Ok(Some(Box::new(ConfigAntipatternDetector))), |
214 | 0 | DetectorType::LLMDatabaseAntipattern => Ok(Some(Box::new(DatabaseAntipatternDetector))), |
215 | 0 | DetectorType::LLMJSIssues => Ok(Some(Box::new(JSLLMIssuesDetector))), |
216 | 0 | DetectorType::LLMPythonIssues => Ok(Some(Box::new(PythonLLMIssuesDetector))), |
217 | 0 | DetectorType::LLMGeneratedComments => Ok(Some(Box::new(LLMGeneratedCommentsDetector))), |
218 | ||
219 | // Advanced LLM-specific patterns | |
220 | 0 | DetectorType::LLMAIModelHallucination => Ok(Some(Box::new(AIModelHallucinationDetector))), |
221 | 0 | DetectorType::LLMIncorrectAsync => Ok(Some(Box::new(IncorrectAsyncDetector))), |
222 | 0 | DetectorType::LLMSecurityAntipattern => Ok(Some(Box::new(LLMSecurityAntipatternDetector))), |
223 | 0 | DetectorType::LLMDBAntipattern => Ok(Some(Box::new(LLMDBAntipatternDetector))), |
224 | 0 | DetectorType::LLMErrorHandlingMistake => Ok(Some(Box::new(LLMErrorHandlingMistakesDetector))), |
225 | 0 | DetectorType::LLMPerformanceMistake => Ok(Some(Box::new(LLMPerformanceMistakesDetector))), |
226 | 0 | DetectorType::LLMTypeMistake => Ok(Some(Box::new(LLMTypeMistakesDetector))), |
227 | ||
228 | // Comprehensive LLM detector | |
229 | 0 | DetectorType::LLMComprehensive => Ok(Some(Box::new(ComprehensiveLLMDetector::new()))), |
230 | ||
231 | 0 | DetectorType::Custom(name) => { |
232 | 0 | if let Some(config) = config { |
233 | 0 | if let Some(pattern) = config.custom_patterns.get(name) { |
234 | 0 | let detector = CustomPatternDetector::new(name, pattern)?; |
235 | 0 | Ok(Some(Box::new(detector))) |
236 | } else { | |
237 | 0 | Ok(None) // Pattern not found in config |
238 | } | |
239 | } else { | |
240 | 0 | Ok(None) // No config provided |
241 | } | |
242 | } | |
243 | } | |
244 | 0 | } |
245 | } | |
246 | ||
247 | /// Predefined detector profiles for common use cases | |
248 | pub enum DetectorProfile { | |
249 | /// Basic TODO/FIXME detection | |
250 | Basic, | |
251 | /// All available detectors | |
252 | Comprehensive, | |
253 | /// Security-focused scanning | |
254 | Security, | |
255 | /// Performance-focused scanning | |
256 | Performance, | |
257 | /// Rust-specific patterns only | |
258 | Rust, | |
259 | /// Production-readiness scanning (finds non-production code) | |
260 | ProductionReady, | |
261 | /// LLM security vulnerabilities only | |
262 | LLMSecurity, | |
263 | /// LLM quality issues only | |
264 | LLMQuality, | |
265 | /// All LLM-related patterns | |
266 | LLMComprehensive, | |
267 | /// Production-ready with LLM detection | |
268 | ProductionReadyWithLLM, | |
269 | /// Custom configuration | |
270 | Custom(Box<EnhancedScanConfig>), | |
271 | } | |
272 | ||
273 | impl DetectorProfile { | |
274 | /// Get detectors for the specified profile | |
275 | 52 | pub fn get_detectors(&self) -> Vec<Box<dyn PatternDetector>> { |
276 | 52 | match self { |
277 | 28 | DetectorProfile::Basic => DetectorFactory::create_default_detectors(), |
278 | 9 | DetectorProfile::Comprehensive => DetectorFactory::create_comprehensive_detectors(), |
279 | 5 | DetectorProfile::Security => DetectorFactory::create_security_detectors(), |
280 | 6 | DetectorProfile::Performance => DetectorFactory::create_performance_detectors(), |
281 | DetectorProfile::ProductionReady => { | |
282 | 0 | DetectorFactory::create_production_ready_detectors() |
283 | } | |
284 | 0 | DetectorProfile::LLMSecurity => DetectorFactory::create_llm_security_detectors(), |
285 | 0 | DetectorProfile::LLMQuality => DetectorFactory::create_llm_quality_detectors(), |
286 | DetectorProfile::LLMComprehensive => { | |
287 | 0 | DetectorFactory::create_llm_comprehensive_detectors() |
288 | } | |
289 | DetectorProfile::ProductionReadyWithLLM => { | |
290 | 0 | DetectorFactory::create_production_ready_with_llm_detectors() |
291 | } | |
292 | 4 | DetectorProfile::Rust => vec![ |
293 | 4 | Box::new(PanicDetector), |
294 | 4 | Box::new(UnwrapDetector), |
295 | 4 | Box::new(ExpectDetector), |
296 | 4 | Box::new(UnimplementedDetector), |
297 | 4 | Box::new(UnreachableDetector), |
298 | 4 | Box::new(CloneDetector), |
299 | 4 | Box::new(ToStringDetector), |
300 | 4 | Box::new(UnsafeDetector), |
301 | ], | |
302 | 0 | DetectorProfile::Custom(config) => DetectorFactory::create_detectors(config), |
303 | } | |
304 | 52 | } |
305 | } | |
306 | ||
307 | #[cfg(test)] | |
308 | mod tests { | |
309 | use super::*; | |
310 | ||
311 | #[test] | |
312 | fn test_default_detectors() { | |
313 | let detectors = DetectorFactory::create_default_detectors(); | |
314 | assert_eq!(detectors.len(), 2); | |
315 | } | |
316 | ||
317 | #[test] | |
318 | fn test_comprehensive_detectors() { | |
319 | let detectors = DetectorFactory::create_comprehensive_detectors(); | |
320 | assert!(detectors.len() > 10); | |
321 | } | |
322 | ||
323 | #[test] | |
324 | fn test_security_detectors() { | |
325 | let detectors = DetectorFactory::create_security_detectors(); | |
326 | assert!(detectors.len() >= 4); | |
327 | } | |
328 | ||
329 | #[test] | |
330 | fn test_detector_profiles() { | |
331 | let basic = DetectorProfile::Basic.get_detectors(); | |
332 | let comprehensive = DetectorProfile::Comprehensive.get_detectors(); | |
333 | ||
334 | assert!(comprehensive.len() > basic.len()); | |
335 | } | |
336 | ||
337 | #[test] | |
338 | fn test_factory_with_custom_detectors() { | |
339 | let mut config = EnhancedScanConfig::default(); | |
340 | config | |
341 | .custom_patterns | |
342 | .insert("MY_PATTERN".to_string(), r"custom".to_string()); | |
343 | config | |
344 | .enabled_detectors | |
345 | .push(DetectorType::Custom("MY_PATTERN".to_string())); | |
346 | ||
347 | let detectors = DetectorFactory::create_detectors(&config); | |
348 | assert!(!detectors.is_empty()); | |
349 | // The default config has 2 detectors, plus our custom one | |
350 | assert!(detectors.len() >= 3); | |
351 | } | |
352 | ||
353 | #[test] | |
354 | fn test_custom_detector_creation_success() { | |
355 | let mut config = EnhancedScanConfig::default(); | |
356 | config | |
357 | .custom_patterns | |
358 | .insert("TEST".to_string(), r"test".to_string()); | |
359 | ||
360 | let result = DetectorFactory::create_detector( | |
361 | &DetectorType::Custom("TEST".to_string()), | |
362 | Some(&config), | |
363 | ); | |
364 | assert!(result.is_ok()); | |
365 | assert!(result.unwrap().is_some()); | |
366 | } | |
367 | ||
368 | #[test] | |
369 | fn test_custom_detector_creation_missing_pattern() { | |
370 | let config = EnhancedScanConfig::default(); | |
371 | ||
372 | let result = DetectorFactory::create_detector( | |
373 | &DetectorType::Custom("MISSING".to_string()), | |
374 | Some(&config), | |
375 | ); | |
376 | assert!(result.is_ok()); | |
377 | assert!(result.unwrap().is_none()); | |
378 | } | |
379 | ||
380 | #[test] | |
381 | fn test_custom_detector_creation_no_config() { | |
382 | let result = | |
383 | DetectorFactory::create_detector(&DetectorType::Custom("TEST".to_string()), None); | |
384 | assert!(result.is_ok()); | |
385 | assert!(result.unwrap().is_none()); | |
386 | } | |
387 | ||
388 | #[test] | |
389 | fn test_custom_detector_invalid_regex() { | |
390 | let mut config = EnhancedScanConfig::default(); | |
391 | config | |
392 | .custom_patterns | |
393 | .insert("INVALID".to_string(), r"[invalid".to_string()); | |
394 | config | |
395 | .enabled_detectors | |
396 | .push(DetectorType::Custom("INVALID".to_string())); | |
397 | ||
398 | let detectors = DetectorFactory::create_detectors(&config); | |
399 | // Should have default detectors but not the invalid custom one | |
400 | assert_eq!(detectors.len(), 2); // default has 2 | |
401 | } | |
402 | } |
Line | Count | Source |
1 | use crate::{Match, PatternDetector}; | |
2 | use aho_corasick::AhoCorasick; | |
3 | use anyhow::Result; | |
4 | use lazy_static::lazy_static; | |
5 | use regex::Regex; | |
6 | use smallvec::SmallVec; | |
7 | use std::path::Path; | |
8 | ||
9 | lazy_static! { | |
10 | pub static ref TODO_REGEX: Regex = Regex::new(r"\b(?i)todo\b").unwrap(); | |
11 | pub static ref FIXME_REGEX: Regex = Regex::new(r"\b(?i)fixme\b").unwrap(); | |
12 | pub static ref HACK_REGEX: Regex = Regex::new(r"\b(?i)hack\b").unwrap(); | |
13 | pub static ref BUG_REGEX: Regex = Regex::new(r"\b(?i)bug\b").unwrap(); | |
14 | pub static ref XXX_REGEX: Regex = Regex::new(r"\bXXX\b").unwrap(); | |
15 | pub static ref NOTE_REGEX: Regex = Regex::new(r"\b(?i)note\b").unwrap(); | |
16 | pub static ref WARNING_REGEX: Regex = Regex::new(r"\b(?i)warning\b").unwrap(); | |
17 | // Rust-specific patterns | |
18 | pub static ref PANIC_REGEX: Regex = Regex::new(r"\bpanic!\s*\(").unwrap(); | |
19 | pub static ref UNWRAP_REGEX: Regex = Regex::new(r"\.unwrap\s*\(\s*\)").unwrap(); | |
20 | pub static ref EXPECT_REGEX: Regex = Regex::new(r"\.expect\s*\(").unwrap(); | |
21 | pub static ref UNIMPLEMENTED_REGEX: Regex = Regex::new(r"\bunimplemented!\s*\(").unwrap(); | |
22 | pub static ref UNREACHABLE_REGEX: Regex = Regex::new(r"\bunreachable!\s*\(").unwrap(); | |
23 | // Performance patterns | |
24 | pub static ref CLONE_REGEX: Regex = Regex::new(r"\.clone\s*\(\s*\)").unwrap(); | |
25 | pub static ref TO_STRING_REGEX: Regex = Regex::new(r"\.to_string\s*\(\s*\)").unwrap(); | |
26 | // Security patterns | |
27 | pub static ref UNSAFE_REGEX: Regex = Regex::new(r"\bunsafe\s+\{").unwrap(); | |
28 | ||
29 | // Development/Phase patterns | |
30 | pub static ref DEV_REGEX: Regex = Regex::new(r"\b(?i)(dev|development)\b").unwrap(); | |
31 | pub static ref DEBUG_REGEX: Regex = Regex::new(r"\b(?i)debug\b").unwrap(); | |
32 | pub static ref TEST_REGEX: Regex = Regex::new(r"\b(?i)(test|testing)\b").unwrap(); | |
33 | pub static ref PHASE_REGEX: Regex = Regex::new(r"\b(?i)phase\s*[0-9]+\b").unwrap(); | |
34 | pub static ref STAGING_REGEX: Regex = Regex::new(r"\b(?i)staging\b").unwrap(); | |
35 | ||
36 | // Non-production code patterns | |
37 | pub static ref CONSOLE_LOG_REGEX: Regex = Regex::new(r"console\.(log|debug|info|warn|error)\s*\(").unwrap(); | |
38 | pub static ref PRINT_REGEX: Regex = Regex::new(r"\b(print|printf|println!?|var_dump)\s*\(|console\.log\s*\(|\becho\s+").unwrap(); | |
39 | pub static ref ALERT_REGEX: Regex = Regex::new(r"\b(alert|confirm|prompt)\s*\(").unwrap(); | |
40 | pub static ref DEBUGGER_REGEX: Regex = Regex::new(r"\b(debugger|pdb\.set_trace|breakpoint|__debugbreak)\b").unwrap(); | |
41 | pub static ref UNUSED_VAR_REGEX: Regex = Regex::new(r"\b(let|var|const)\s+(\w+)\s*[=;].*?\/\/\s*(?i)(unused|not\s+used)").unwrap(); | |
42 | pub static ref DEAD_CODE_REGEX: Regex = Regex::new(r"\/\/\s*(?i)(dead\s*code|unreachable|never\s+called)").unwrap(); | |
43 | pub static ref EXPERIMENTAL_REGEX: Regex = Regex::new(r"\b(?i)(experimental|prototype|poc|proof[\s-]of[\s-]concept)\b").unwrap(); | |
44 | } | |
45 | ||
46 | 453 | fn detect_pattern_with_context( |
47 | 453 | content: &str, |
48 | 453 | file_path: &Path, |
49 | 453 | pattern_name: &str, |
50 | 453 | re: &Regex, |
51 | 453 | ) -> Vec<Match> { |
52 | 453 | let mut matches = smallvec::SmallVec::<[Match; 4]>::new(); |
53 | 8.82k | for (line_idx, line) in |
54 | 8.82k | for |
55 | 130 | // Extract more context around the match |
56 | 130 | let context_start = mat.start().saturating_sub(10); |
57 | 130 | let context_end = (mat.end() + 20).min(line.len()); |
58 | 130 | let context = &line[context_start..context_end]; |
59 | 130 | |
60 | 130 | matches.push(Match { |
61 | 130 | file_path: file_path.to_string_lossy().to_string(), |
62 | 130 | line_number: line_idx + 1, |
63 | 130 | column: mat.start() + 1, |
64 | 130 | pattern: pattern_name.to_string(), |
65 | 130 | message: format!("{}: {}", pattern_name, context.trim()), |
66 | 130 | }); |
67 | 130 | } |
68 | } | |
69 | 453 | matches.into_vec() |
70 | 453 | } |
71 | ||
72 | /// Default detector for TODO comments (case-insensitive) | |
73 | pub struct TodoDetector; | |
74 | ||
75 | impl PatternDetector for TodoDetector { | |
76 | 38 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
77 | 38 | detect_pattern_with_context(content, file_path, "TODO", &TODO_REGEX) |
78 | 38 | } |
79 | } | |
80 | ||
81 | /// Default detector for FIXME comments (case-insensitive) | |
82 | pub struct FixmeDetector; | |
83 | ||
84 | impl PatternDetector for FixmeDetector { | |
85 | 38 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
86 | 38 | detect_pattern_with_context(content, file_path, "FIXME", &FIXME_REGEX) |
87 | 38 | } |
88 | } | |
89 | ||
90 | /// Detector for HACK comments indicating temporary workarounds | |
91 | pub struct HackDetector; | |
92 | ||
93 | impl PatternDetector for HackDetector { | |
94 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
95 | 16 | detect_pattern_with_context(content, file_path, "HACK", &HACK_REGEX) |
96 | 16 | } |
97 | } | |
98 | ||
99 | /// Detector for BUG comments indicating known issues | |
100 | pub struct BugDetector; | |
101 | ||
102 | impl PatternDetector for BugDetector { | |
103 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
104 | 16 | detect_pattern_with_context(content, file_path, "BUG", &BUG_REGEX) |
105 | 16 | } |
106 | } | |
107 | ||
108 | /// Detector for XXX comments indicating urgent attention needed | |
109 | pub struct XxxDetector; | |
110 | ||
111 | impl PatternDetector for XxxDetector { | |
112 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
113 | 16 | detect_pattern_with_context(content, file_path, "XXX", &XXX_REGEX) |
114 | 16 | } |
115 | } | |
116 | ||
117 | /// Detector for NOTE comments | |
118 | pub struct NoteDetector; | |
119 | ||
120 | impl PatternDetector for NoteDetector { | |
121 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
122 | 16 | detect_pattern_with_context(content, file_path, "NOTE", &NOTE_REGEX) |
123 | 16 | } |
124 | } | |
125 | ||
126 | /// Detector for WARNING comments | |
127 | pub struct WarningDetector; | |
128 | ||
129 | impl PatternDetector for WarningDetector { | |
130 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
131 | 16 | detect_pattern_with_context(content, file_path, "WARNING", &WARNING_REGEX) |
132 | 16 | } |
133 | } | |
134 | ||
135 | /// Detector for panic! macros in Rust code | |
136 | pub struct PanicDetector; | |
137 | ||
138 | impl PatternDetector for PanicDetector { | |
139 | 17 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
140 | // Only detect in Rust files | |
141 | 17 | if let Some(ext) = file_path.extension() { |
142 | 17 | if ext == "rs" { |
143 | 17 | return detect_pattern_with_context(content, file_path, "PANIC", &PANIC_REGEX); |
144 | 0 | } |
145 | 0 | } |
146 | 0 | Vec::new() |
147 | 17 | } |
148 | } | |
149 | ||
150 | /// Detector for .unwrap() calls in Rust code (potential panic points) | |
151 | pub struct UnwrapDetector; | |
152 | ||
153 | impl PatternDetector for UnwrapDetector { | |
154 | 19 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
155 | // Only detect in Rust files | |
156 | 19 | if let Some(ext) = file_path.extension() { |
157 | 19 | if ext == "rs" { |
158 | 19 | return detect_pattern_with_context(content, file_path, "UNWRAP", &UNWRAP_REGEX); |
159 | 0 | } |
160 | 0 | } |
161 | 0 | Vec::new() |
162 | 19 | } |
163 | } | |
164 | ||
165 | /// Detector for .expect() calls in Rust code | |
166 | pub struct ExpectDetector; | |
167 | ||
168 | impl PatternDetector for ExpectDetector { | |
169 | 17 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
170 | // Only detect in Rust files | |
171 | 17 | if let Some(ext) = file_path.extension() { |
172 | 17 | if ext == "rs" { |
173 | 17 | return detect_pattern_with_context(content, file_path, "EXPECT", &EXPECT_REGEX); |
174 | 0 | } |
175 | 0 | } |
176 | 0 | Vec::new() |
177 | 17 | } |
178 | } | |
179 | ||
180 | /// Detector for unimplemented! macros in Rust code | |
181 | pub struct UnimplementedDetector; | |
182 | ||
183 | impl PatternDetector for UnimplementedDetector { | |
184 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
185 | // Only detect in Rust files | |
186 | 16 | if let Some(ext) = file_path.extension() { |
187 | 16 | if ext == "rs" { |
188 | 16 | return detect_pattern_with_context( |
189 | 16 | content, |
190 | 16 | file_path, |
191 | 16 | "UNIMPLEMENTED", |
192 | 16 | &UNIMPLEMENTED_REGEX, |
193 | ); | |
194 | 0 | } |
195 | 0 | } |
196 | 0 | Vec::new() |
197 | 16 | } |
198 | } | |
199 | ||
200 | /// Detector for unreachable! macros in Rust code | |
201 | pub struct UnreachableDetector; | |
202 | ||
203 | impl PatternDetector for UnreachableDetector { | |
204 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
205 | // Only detect in Rust files | |
206 | 16 | if let Some(ext) = file_path.extension() { |
207 | 16 | if ext == "rs" { |
208 | 16 | return detect_pattern_with_context( |
209 | 16 | content, |
210 | 16 | file_path, |
211 | 16 | "UNREACHABLE", |
212 | 16 | &UNREACHABLE_REGEX, |
213 | ); | |
214 | 0 | } |
215 | 0 | } |
216 | 0 | Vec::new() |
217 | 16 | } |
218 | } | |
219 | ||
220 | /// Detector for excessive .clone() calls (potential performance issue) | |
221 | pub struct CloneDetector; | |
222 | ||
223 | impl PatternDetector for CloneDetector { | |
224 | 18 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
225 | // Only detect in Rust files | |
226 | 18 | if let Some(ext) = file_path.extension() { |
227 | 18 | if ext == "rs" { |
228 | 18 | return detect_pattern_with_context(content, file_path, "CLONE", &CLONE_REGEX); |
229 | 0 | } |
230 | 0 | } |
231 | 0 | Vec::new() |
232 | 18 | } |
233 | } | |
234 | ||
235 | /// Detector for .to_string() calls (potential performance issue) | |
236 | pub struct ToStringDetector; | |
237 | ||
238 | impl PatternDetector for ToStringDetector { | |
239 | 18 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
240 | // Only detect in Rust files | |
241 | 18 | if let Some(ext) = file_path.extension() { |
242 | 18 | if ext == "rs" { |
243 | 18 | return detect_pattern_with_context( |
244 | 18 | content, |
245 | 18 | file_path, |
246 | 18 | "TO_STRING", |
247 | 18 | &TO_STRING_REGEX, |
248 | ); | |
249 | 0 | } |
250 | 0 | } |
251 | 0 | Vec::new() |
252 | 18 | } |
253 | } | |
254 | ||
255 | /// Detector for unsafe blocks in Rust code (security concern) | |
256 | pub struct UnsafeDetector; | |
257 | ||
258 | impl PatternDetector for UnsafeDetector { | |
259 | 17 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
260 | // Only detect in Rust files | |
261 | 17 | if let Some(ext) = file_path.extension() { |
262 | 17 | if ext == "rs" { |
263 | 17 | return detect_pattern_with_context(content, file_path, "UNSAFE", &UNSAFE_REGEX); |
264 | 0 | } |
265 | 0 | } |
266 | 0 | Vec::new() |
267 | 17 | } |
268 | } | |
269 | ||
270 | /// Detector for development/dev environment references | |
271 | pub struct DevDetector; | |
272 | ||
273 | impl PatternDetector for DevDetector { | |
274 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
275 | 16 | detect_pattern_with_context(content, file_path, "DEV", &DEV_REGEX) |
276 | 16 | } |
277 | } | |
278 | ||
279 | /// Detector for debug-related code | |
280 | pub struct DebugDetector; | |
281 | ||
282 | impl PatternDetector for DebugDetector { | |
283 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
284 | 16 | detect_pattern_with_context(content, file_path, "DEBUG", &DEBUG_REGEX) |
285 | 16 | } |
286 | } | |
287 | ||
288 | /// Detector for test-related code in production files | |
289 | pub struct TestDetector; | |
290 | ||
291 | impl PatternDetector for TestDetector { | |
292 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
293 | // Skip actual test files | |
294 | 16 | if let Some(path_str) = file_path.to_str() { |
295 | 16 | if path_str.contains("test") || |
296 | 1 | return Vec::new(); |
297 | 15 | } |
298 | 0 | } |
299 | 15 | detect_pattern_with_context(content, file_path, "TEST", &TEST_REGEX) |
300 | 16 | } |
301 | } | |
302 | ||
303 | /// Detector for phase markers in code | |
304 | pub struct PhaseDetector; | |
305 | ||
306 | impl PatternDetector for PhaseDetector { | |
307 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
308 | 16 | detect_pattern_with_context(content, file_path, "PHASE", &PHASE_REGEX) |
309 | 16 | } |
310 | } | |
311 | ||
312 | /// Detector for staging environment references | |
313 | pub struct StagingDetector; | |
314 | ||
315 | impl PatternDetector for StagingDetector { | |
316 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
317 | 16 | detect_pattern_with_context(content, file_path, "STAGING", &STAGING_REGEX) |
318 | 16 | } |
319 | } | |
320 | ||
321 | /// Detector for console.log statements (JavaScript/TypeScript) | |
322 | pub struct ConsoleLogDetector; | |
323 | ||
324 | impl PatternDetector for ConsoleLogDetector { | |
325 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
326 | // Detect in JavaScript/TypeScript files | |
327 | 16 | if let Some(ext) = file_path.extension() { |
328 | 16 | let ext_str = ext.to_string_lossy(); |
329 | 0 | if matches!( |
330 | 16 | ext_str.as_ref(), |
331 | 16 | "js" | "ts" | "jsx" | "tsx" | "vue" | "svelte" |
332 | ) { | |
333 | 0 | return detect_pattern_with_context( |
334 | 0 | content, |
335 | 0 | file_path, |
336 | 0 | "CONSOLE_LOG", |
337 | 0 | &CONSOLE_LOG_REGEX, |
338 | ); | |
339 | 16 | } |
340 | 0 | } |
341 | 16 | Vec::new() |
342 | 16 | } |
343 | } | |
344 | ||
345 | /// Detector for print statements in various languages | |
346 | pub struct PrintDetector; | |
347 | ||
348 | impl PatternDetector for PrintDetector { | |
349 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
350 | 16 | detect_pattern_with_context(content, file_path, "PRINT", &PRINT_REGEX) |
351 | 16 | } |
352 | } | |
353 | ||
354 | /// Detector for alert/prompt statements (JavaScript) | |
355 | pub struct AlertDetector; | |
356 | ||
357 | impl PatternDetector for AlertDetector { | |
358 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
359 | // Detect in JavaScript/TypeScript files | |
360 | 16 | if let Some(ext) = file_path.extension() { |
361 | 16 | let ext_str = ext.to_string_lossy(); |
362 | 0 | if matches!( |
363 | 16 | ext_str.as_ref(), |
364 | 16 | "js" | "ts" | "jsx" | "tsx" | "html" | "vue" | "svelte" |
365 | ) { | |
366 | 0 | return detect_pattern_with_context(content, file_path, "ALERT", &ALERT_REGEX); |
367 | 16 | } |
368 | 0 | } |
369 | 16 | Vec::new() |
370 | 16 | } |
371 | } | |
372 | ||
373 | /// Detector for debugger statements and breakpoints | |
374 | pub struct DebuggerDetector; | |
375 | ||
376 | impl PatternDetector for DebuggerDetector { | |
377 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
378 | 16 | detect_pattern_with_context(content, file_path, "DEBUGGER", &DEBUGGER_REGEX) |
379 | 16 | } |
380 | } | |
381 | ||
382 | /// Detector for explicitly marked unused variables | |
383 | pub struct UnusedVarDetector; | |
384 | ||
385 | impl PatternDetector for UnusedVarDetector { | |
386 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
387 | 16 | detect_pattern_with_context(content, file_path, "UNUSED_VAR", &UNUSED_VAR_REGEX) |
388 | 16 | } |
389 | } | |
390 | ||
391 | /// Detector for dead code comments | |
392 | pub struct DeadCodeDetector; | |
393 | ||
394 | impl PatternDetector for DeadCodeDetector { | |
395 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
396 | 16 | detect_pattern_with_context(content, file_path, "DEAD_CODE", &DEAD_CODE_REGEX) |
397 | 16 | } |
398 | } | |
399 | ||
400 | /// Detector for experimental/prototype code | |
401 | pub struct ExperimentalDetector; | |
402 | ||
403 | impl PatternDetector for ExperimentalDetector { | |
404 | 16 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
405 | 16 | detect_pattern_with_context(content, file_path, "EXPERIMENTAL", &EXPERIMENTAL_REGEX) |
406 | 16 | } |
407 | } | |
408 | ||
409 | /// Custom pattern detector that uses user-defined regex patterns | |
410 | pub struct CustomPatternDetector { | |
411 | name: String, | |
412 | regex: Regex, | |
413 | } | |
414 | ||
415 | impl CustomPatternDetector { | |
416 | /// Creates a new custom pattern detector with the given name and regex pattern | |
417 | 0 | pub fn new(name: &str, pattern: &str) -> Result<Self> { |
418 | 0 | let regex = Regex::new(pattern)?; |
419 | 0 | Ok(Self { |
420 | 0 | name: name.to_string(), |
421 | 0 | regex, |
422 | 0 | }) |
423 | 0 | } |
424 | } | |
425 | ||
426 | impl PatternDetector for CustomPatternDetector { | |
427 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
428 | 0 | detect_pattern_with_context(content, file_path, &self.name, &self.regex) |
429 | 0 | } |
430 | } | |
431 | ||
432 | /// High-performance detector using Aho-Corasick algorithm for multiple pattern matching | |
433 | pub struct HighPerformanceDetector { | |
434 | patterns: Vec<String>, | |
435 | pattern_names: Vec<String>, | |
436 | ac: AhoCorasick, | |
437 | } | |
438 | ||
439 | impl HighPerformanceDetector { | |
440 | /// Creates a new high-performance detector with the given patterns | |
441 | 0 | pub fn new(patterns: Vec<(&str, &str)>) -> Result<Self> { |
442 | 0 | let (pattern_names, pattern_strings): (Vec<String>, Vec<String>) = patterns |
443 | 0 | .into_iter() |
444 | 0 | .map(|(name, pattern)| (name.to_string(), pattern.to_string())) |
445 | 0 | .unzip(); |
446 | ||
447 | 0 | let ac = AhoCorasick::new(&pattern_strings)?; |
448 | ||
449 | 0 | Ok(Self { |
450 | 0 | patterns: pattern_strings, |
451 | 0 | pattern_names, |
452 | 0 | ac, |
453 | 0 | }) |
454 | 0 | } |
455 | ||
456 | /// Creates a detector for common TODO/FIXME patterns | |
457 | 0 | pub fn for_common_patterns() -> Self { |
458 | 0 | let patterns = vec![ |
459 | 0 | ("TODO", r"(?i)todo"), |
460 | 0 | ("FIXME", r"(?i)fixme"), |
461 | 0 | ("HACK", r"(?i)hack"), |
462 | 0 | ("BUG", r"(?i)bug"), |
463 | 0 | ("XXX", r"XXX"), |
464 | 0 | ("NOTE", r"(?i)note"), |
465 | 0 | ("WARNING", r"(?i)warning"), |
466 | 0 | ("PANIC", r"panic!"), |
467 | 0 | ("UNWRAP", r"\.unwrap\(\)"), |
468 | 0 | ("UNSAFE", r"unsafe\s+\{"), |
469 | 0 | ("DEBUG", r"(?i)debug"), |
470 | 0 | ("TEST", r"(?i)test"), |
471 | 0 | ("PHASE", r"(?i)phase\s*[0-9]+"), |
472 | 0 | ("CONSOLE_LOG", r"console\.(log|debug|info|warn|error)"), |
473 | 0 | ("PRINT", r"print|println|echo"), |
474 | 0 | ("ALERT", r"alert\(|confirm\(|prompt\("), |
475 | 0 | ("DEBUGGER", r"debugger|pdb\.set_trace"), |
476 | ]; | |
477 | ||
478 | 0 | Self::new(patterns).unwrap() |
479 | 0 | } |
480 | } | |
481 | ||
482 | impl PatternDetector for HighPerformanceDetector { | |
483 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
484 | 0 | let mut matches = Vec::new(); |
485 | ||
486 | 0 | for mat in self.ac.find_iter(content) { |
487 | 0 | let pattern_id = mat.pattern(); |
488 | 0 | let pattern_name = &self.pattern_names[pattern_id.as_usize()]; |
489 | ||
490 | // Extract context around the match | |
491 | 0 | let start = mat.start().saturating_sub(15); |
492 | 0 | let end = (mat.end() + 25).min(content.len()); |
493 | 0 | let context = &content[start..end]; |
494 | ||
495 | // Find the line number | |
496 | 0 | let line_start = content[..mat.start()].rfind('\n').map(|pos| pos + 1).unwrap_or(0); |
497 | 0 | let line_number = content[..line_start].lines().count() + 1; |
498 | 0 | let column = mat.start() - line_start + 1; |
499 | ||
500 | 0 | matches.push(Match { |
501 | 0 | file_path: file_path.to_string_lossy().to_string(), |
502 | 0 | line_number, |
503 | 0 | column, |
504 | 0 | pattern: pattern_name.clone(), |
505 | 0 | message: format!("{}: {}", pattern_name, context.trim()), |
506 | 0 | }); |
507 | } | |
508 | ||
509 | 0 | matches |
510 | 0 | } |
511 | } | |
512 | ||
513 | #[cfg(test)] | |
514 | mod tests { | |
515 | use super::*; | |
516 | use std::path::PathBuf; | |
517 | ||
518 | #[test] | |
519 | fn test_hack_detector() { | |
520 | let detector = HackDetector; | |
521 | let content = "// HACK: temporary fix\nlet x = 1;"; | |
522 | let path = PathBuf::from("test.rs"); | |
523 | let matches = detector.detect(content, &path); | |
524 | assert_eq!(matches.len(), 1); | |
525 | assert_eq!(matches[0].pattern, "HACK"); | |
526 | } | |
527 | ||
528 | #[test] | |
529 | fn test_panic_detector_rust_only() { | |
530 | let detector = PanicDetector; | |
531 | let rust_content = "panic!(\"error\");"; | |
532 | let js_content = "panic!(\"error\");"; | |
533 | ||
534 | let rust_path = PathBuf::from("test.rs"); | |
535 | let js_path = PathBuf::from("test.js"); | |
536 | ||
537 | let rust_matches = detector.detect(rust_content, &rust_path); | |
538 | let js_matches = detector.detect(js_content, &js_path); | |
539 | ||
540 | assert_eq!(rust_matches.len(), 1); | |
541 | assert_eq!(js_matches.len(), 0); | |
542 | } | |
543 | ||
544 | #[test] | |
545 | fn test_unwrap_detector() { | |
546 | let detector = UnwrapDetector; | |
547 | let content = "let value = some_option.unwrap();"; | |
548 | let path = PathBuf::from("test.rs"); | |
549 | let matches = detector.detect(content, &path); | |
550 | assert_eq!(matches.len(), 1); | |
551 | assert_eq!(matches[0].pattern, "UNWRAP"); | |
552 | } | |
553 | ||
554 | #[test] | |
555 | fn test_case_insensitive_todo() { | |
556 | let detector = TodoDetector; | |
557 | let content = "todo: fix this\nTODO: another\nTodo: yet another"; | |
558 | let path = PathBuf::from("test.rs"); | |
559 | let matches = detector.detect(content, &path); | |
560 | assert_eq!(matches.len(), 3); | |
561 | } | |
562 | ||
563 | #[test] | |
564 | fn test_custom_pattern_detector() { | |
565 | let detector = CustomPatternDetector::new("TEST", r"test").unwrap(); | |
566 | let content = "this is a test"; | |
567 | let path = PathBuf::from("test.txt"); | |
568 | let matches = detector.detect(content, &path); | |
569 | assert_eq!(matches.len(), 1); | |
570 | assert_eq!(matches[0].pattern, "TEST"); | |
571 | assert_eq!(matches[0].line_number, 1); | |
572 | assert!(matches[0].message.contains("TEST")); | |
573 | } | |
574 | ||
575 | #[test] | |
576 | fn test_custom_pattern_detector_invalid_regex() { | |
577 | let result = CustomPatternDetector::new("TEST", r"[invalid"); | |
578 | assert!(result.is_err()); | |
579 | } | |
580 | ||
581 | #[test] | |
582 | fn test_console_log_detector() { | |
583 | let detector = ConsoleLogDetector; | |
584 | let js_content = "console.log('debug info');"; | |
585 | let py_content = "console.log('debug info');"; | |
586 | ||
587 | let js_path = PathBuf::from("test.js"); | |
588 | let py_path = PathBuf::from("test.py"); | |
589 | ||
590 | let js_matches = detector.detect(js_content, &js_path); | |
591 | let py_matches = detector.detect(py_content, &py_path); | |
592 | ||
593 | assert_eq!(js_matches.len(), 1); | |
594 | assert_eq!(py_matches.len(), 0); // Should only detect in JS/TS files | |
595 | assert_eq!(js_matches[0].pattern, "CONSOLE_LOG"); | |
596 | } | |
597 | ||
598 | #[test] | |
599 | fn test_debugger_detector() { | |
600 | let detector = DebuggerDetector; | |
601 | let content = "function test() {\n debugger;\n pdb.set_trace();\n}"; | |
602 | let path = PathBuf::from("test.js"); | |
603 | let matches = detector.detect(content, &path); | |
604 | assert_eq!(matches.len(), 2); | |
605 | assert!(matches.iter().all(|m| m.pattern == "DEBUGGER")); | |
606 | } | |
607 | ||
608 | #[test] | |
609 | fn test_phase_detector() { | |
610 | let detector = PhaseDetector; | |
611 | let content = "// Phase 1 implementation\nlet phase2_code = 'todo';"; | |
612 | let path = PathBuf::from("test.js"); | |
613 | let matches = detector.detect(content, &path); | |
614 | assert_eq!(matches.len(), 1); | |
615 | assert_eq!(matches[0].pattern, "PHASE"); | |
616 | assert!(matches[0].message.contains("Phase 1")); | |
617 | } | |
618 | ||
619 | #[test] | |
620 | fn test_print_detector_multi_language() { | |
621 | let detector = PrintDetector; | |
622 | let content = "print('debug')\nprintf('test')\necho 'hello'\nconsole.log('js')"; | |
623 | let path = PathBuf::from("test.py"); | |
624 | let matches = detector.detect(content, &path); | |
625 | ||
626 | // Should find all 4 print statements | |
627 | assert_eq!(matches.len(), 4); | |
628 | assert!(matches.iter().all(|m| m.pattern == "PRINT")); | |
629 | ||
630 | // Check specific patterns are found | |
631 | let messages: Vec<String> = matches.iter().map(|m| m.message.clone()).collect(); | |
632 | assert!(messages.iter().any(|m| m.contains("print("))); | |
633 | assert!(messages.iter().any(|m| m.contains("printf("))); | |
634 | assert!(messages.iter().any(|m| m.contains("console.log("))); | |
635 | } | |
636 | ||
637 | #[test] | |
638 | fn test_unused_var_detector() { | |
639 | let detector = UnusedVarDetector; | |
640 | let content = | |
641 | "let unusedVar = 5; // unused\nvar used = 10;\nconst another = 2; // not used"; | |
642 | let path = PathBuf::from("test.js"); | |
643 | let matches = detector.detect(content, &path); | |
644 | assert_eq!(matches.len(), 2); | |
645 | assert!(matches.iter().all(|m| m.pattern == "UNUSED_VAR")); | |
646 | } | |
647 | ||
648 | #[test] | |
649 | fn test_experimental_detector() { | |
650 | let detector = ExperimentalDetector; | |
651 | let content = "// experimental feature\n// This is a prototype\n// POC implementation"; | |
652 | let path = PathBuf::from("test.rs"); | |
653 | let matches = detector.detect(content, &path); | |
654 | assert_eq!(matches.len(), 3); | |
655 | assert!(matches.iter().all(|m| m.pattern == "EXPERIMENTAL")); | |
656 | } | |
657 | ||
658 | #[test] | |
659 | fn test_test_detector_skips_test_files() { | |
660 | let detector = TestDetector; | |
661 | let content = "// test implementation"; | |
662 | ||
663 | let test_path = PathBuf::from("src/test/test_module.rs"); | |
664 | let prod_path = PathBuf::from("src/main.rs"); | |
665 | ||
666 | let test_matches = detector.detect(content, &test_path); | |
667 | let prod_matches = detector.detect(content, &prod_path); | |
668 | ||
669 | assert_eq!(test_matches.len(), 0); // Should skip test files | |
670 | assert_eq!(prod_matches.len(), 1); // Should detect in production files | |
671 | } | |
672 | } |
Line | Count | Source |
1 | use crate::{Match, PatternDetector, PerformanceMonitor}; | |
2 | use anyhow::Result; | |
3 | use serde::{Deserialize, Serialize}; | |
4 | use std::collections::HashMap; | |
5 | use std::path::PathBuf; | |
6 | use std::sync::Arc; | |
7 | use tokio::sync::Mutex; | |
8 | use tracing::{info, warn}; | |
9 | ||
10 | use std::time::Instant; | |
11 | ||
12 | /// Work unit for distributed processing | |
13 | #[derive(Debug, Clone, Serialize, Deserialize)] | |
14 | pub struct WorkUnit { | |
15 | pub id: String, | |
16 | pub files: Vec<PathBuf>, | |
17 | pub detector_types: Vec<String>, | |
18 | pub priority: u8, // 0-255, higher = more priority | |
19 | pub estimated_duration_ms: u64, | |
20 | } | |
21 | ||
22 | /// Result from processing a work unit | |
23 | #[derive(Debug, Clone, Serialize, Deserialize)] | |
24 | pub struct WorkResult { | |
25 | pub unit_id: String, | |
26 | pub worker_id: String, | |
27 | pub matches: Vec<Match>, | |
28 | pub files_processed: usize, | |
29 | pub processing_time_ms: u64, | |
30 | pub timestamp: u64, | |
31 | pub errors: Vec<String>, | |
32 | } | |
33 | ||
34 | /// Worker node configuration | |
35 | #[derive(Debug, Clone, Serialize, Deserialize)] | |
36 | pub struct WorkerConfig { | |
37 | pub worker_id: String, | |
38 | pub max_concurrent_units: usize, | |
39 | pub supported_detectors: Vec<String>, | |
40 | pub cpu_cores: usize, | |
41 | pub memory_limit_mb: usize, | |
42 | pub endpoint: Option<String>, // For remote workers | |
43 | } | |
44 | ||
45 | /// Distributed scan coordinator with performance monitoring | |
46 | pub struct DistributedCoordinator { | |
47 | workers: Vec<WorkerConfig>, | |
48 | work_queue: Vec<WorkUnit>, | |
49 | completed_work: HashMap<String, WorkResult>, | |
50 | detectors: HashMap<String, Box<dyn PatternDetector>>, | |
51 | monitor: Arc<Mutex<PerformanceMonitor>>, | |
52 | } | |
53 | ||
54 | impl DistributedCoordinator { | |
55 | 3 | pub fn new() -> Self { |
56 | 3 | Self { |
57 | 3 | workers: Vec::new(), |
58 | 3 | work_queue: Vec::new(), |
59 | 3 | completed_work: HashMap::new(), |
60 | 3 | detectors: HashMap::new(), |
61 | 3 | monitor: Arc::new(Mutex::new(PerformanceMonitor::new())), |
62 | 3 | } |
63 | 3 | } |
64 | ||
65 | /// Register a worker node | |
66 | 8 | pub fn register_worker(&mut self, config: WorkerConfig) { |
67 | 8 | info!( |
68 | 0 | "๐ค Registered worker: {} (cores: {}, memory: {}MB)", |
69 | config.worker_id, config.cpu_cores, config.memory_limit_mb | |
70 | ); | |
71 | 8 | self.workers.push(config); |
72 | 8 | } |
73 | ||
74 | /// Register pattern detectors | |
75 | 4 | pub fn register_detector(&mut self, name: String, detector: Box<dyn PatternDetector>) { |
76 | 4 | self.detectors.insert(name, detector); |
77 | 4 | } |
78 | ||
79 | /// Create work units from file list | |
80 | 2 | pub fn create_work_units(&mut self, files: Vec<PathBuf>, batch_size: usize) -> Result<()> { |
81 | 2 | for (unit_id, chunk) in files.chunks(batch_size).enumerate() { |
82 | 2 | let estimated_duration = self.estimate_processing_time(chunk); |
83 | 2 | |
84 | 2 | let work_unit = WorkUnit { |
85 | 2 | id: format!("unit_{}", unit_id), |
86 | 2 | files: chunk.to_vec(), |
87 | 2 | detector_types: self.detectors.keys().cloned().collect(), |
88 | 2 | priority: self.calculate_priority(chunk), |
89 | 2 | estimated_duration_ms: estimated_duration, |
90 | 2 | }; |
91 | 2 | |
92 | 2 | self.work_queue.push(work_unit); |
93 | 2 | } |
94 | ||
95 | // Sort by priority (higher priority first) | |
96 | 2 | self.work_queue.sort_by(|a, b| |
97 | ||
98 | 2 | info!( |
99 | 0 | "๐ฆ Created {} work units from {} files", |
100 | 0 | self.work_queue.len(), |
101 | 0 | files.len() |
102 | ); | |
103 | 2 | Ok(()) |
104 | 2 | } |
105 | ||
106 | /// Distribute and execute work units with performance monitoring | |
107 | 2 | pub async fn execute_distributed_scan(&mut self) -> Result<Vec<Match>> { |
108 | 2 | let start_time = Instant::now(); |
109 | 2 | let total_units = self.work_queue.len(); |
110 | ||
111 | 2 | info!( |
112 | 0 | "๐ Starting distributed scan with {} workers and {} work units", |
113 | 0 | self.workers.len(), |
114 | total_units | |
115 | ); | |
116 | ||
117 | // Start monitoring | |
118 | { | |
119 | 2 | let mut monitor = self.monitor.lock().await; |
120 | 2 | monitor.start_operation("distributed_scan"); |
121 | } | |
122 | ||
123 | 2 | if self.workers.is_empty() { |
124 | // Fallback to local processing | |
125 | 0 | return self.execute_local_fallback().await; |
126 | 2 | } |
127 | ||
128 | // Simulate distributed processing (in real implementation, this would use | |
129 | // actual network communication, message queues, etc.) | |
130 | 2 | self.simulate_distributed_execution().await |
131 | ||
132 | 2 | let total_matches: Vec<Match> = self |
133 | 2 | .completed_work |
134 | 2 | .values() |
135 | 2 | .flat_map(|result| result.matches.clone()) |
136 | 2 | .collect(); |
137 | ||
138 | 2 | let duration = start_time.elapsed(); |
139 | 2 | self.print_execution_summary(duration, total_matches.len()); |
140 | ||
141 | // End monitoring | |
142 | { | |
143 | 2 | let mut monitor = self.monitor.lock().await; |
144 | 2 | monitor.end_operation("distributed_scan").await |
145 | } | |
146 | ||
147 | 2 | Ok(total_matches) |
148 | 2 | } |
149 | ||
150 | /// Get distributed scan statistics | |
151 | 4 | pub fn get_statistics(&self) -> DistributedStats { |
152 | 4 | let total_files: usize = self |
153 | 4 | .completed_work |
154 | 4 | .values() |
155 | 4 | .map(|r| r.files_processed) |
156 | 4 | .sum(); |
157 | ||
158 | 4 | let total_processing_time: u64 = self |
159 | 4 | .completed_work |
160 | 4 | .values() |
161 | 4 | .map(|r| r.processing_time_ms) |
162 | 4 | .sum(); |
163 | ||
164 | 4 | let worker_utilization: HashMap<String, f64> = self |
165 | 4 | .workers |
166 | 4 | .iter() |
167 | 12 | . |
168 | 12 | let worker_results: Vec<&WorkResult> = self |
169 | 12 | .completed_work |
170 | 12 | .values() |
171 | 12 | .filter(|r| r.worker_id == w.worker_id) |
172 | 12 | .collect(); |
173 | ||
174 | 12 | let utilization = if !worker_results.is_empty() { |
175 | 4 | worker_results.len() as f64 / self.work_queue.len() as f64 |
176 | } else { | |
177 | 8 | 0.0 |
178 | }; | |
179 | ||
180 | 12 | (w.worker_id.clone(), utilization) |
181 | 12 | }) |
182 | 4 | .collect(); |
183 | ||
184 | DistributedStats { | |
185 | 4 | total_workers: self.workers.len(), |
186 | 4 | total_work_units: self.work_queue.len(), |
187 | 4 | completed_units: self.completed_work.len(), |
188 | 4 | total_files_processed: total_files, |
189 | 4 | total_processing_time_ms: total_processing_time, |
190 | 4 | worker_utilization, |
191 | 4 | average_unit_size: if !self.work_queue.is_empty() { |
192 | 4 | total_files as f64 / self.work_queue.len() as f64 |
193 | } else { | |
194 | 0 | 0.0 |
195 | }, | |
196 | } | |
197 | 4 | } |
198 | ||
199 | 2 | async fn simulate_distributed_execution(&mut self) -> Result<()> { |
200 | use rayon::prelude::*; | |
201 | ||
202 | // Process work units in parallel (simulating distributed workers) | |
203 | 2 | let results: Vec<WorkResult> = self |
204 | 2 | .work_queue |
205 | 2 | .par_iter() |
206 | 2 | .enumerate() |
207 | 2 | .map(|(i, unit)| { |
208 | 2 | let worker_id = format!("worker_{}", i % self.workers.len()); |
209 | 2 | self.process_work_unit(unit, &worker_id) |
210 | 2 | }) |
211 | 2 | .collect::<Result<Vec<_>>>() |
212 | ||
213 | // Store results | |
214 | 4 | for |
215 | 2 | self.completed_work.insert(result.unit_id.clone(), result); |
216 | 2 | } |
217 | ||
218 | 2 | Ok(()) |
219 | 2 | } |
220 | ||
221 | 2 | fn process_work_unit(&self, unit: &WorkUnit, worker_id: &str) -> Result<WorkResult> { |
222 | 2 | let start_time = Instant::now(); |
223 | 2 | let mut all_matches = Vec::new(); |
224 | 2 | let mut errors = Vec::new(); |
225 | 2 | let mut files_processed = 0; |
226 | ||
227 | 5 | for |
228 | 3 | match std::fs::read_to_string(file_path) { |
229 | 2 | Ok(content) => { |
230 | 6 | for |
231 | 4 | if let Some(detector) = self.detectors.get(detector_name) { |
232 | 4 | let matches = detector.detect(&content, file_path); |
233 | 4 | all_matches.extend(matches); |
234 | 4 |
|
235 | } | |
236 | 2 | files_processed += 1; |
237 | } | |
238 | 1 | Err(e) => { |
239 | 1 | errors.push(format!("Failed to read {}: {}", file_path.display(), e)); |
240 | 1 | } |
241 | } | |
242 | } | |
243 | ||
244 | 2 | let processing_time = start_time.elapsed(); |
245 | ||
246 | Ok(WorkResult { | |
247 | 2 | unit_id: unit.id.clone(), |
248 | 2 | worker_id: worker_id.to_string(), |
249 | 2 | matches: all_matches, |
250 | 2 | files_processed, |
251 | 2 | processing_time_ms: processing_time.as_millis() as u64, |
252 | 2 | timestamp: std::time::SystemTime::now() |
253 | 2 | .duration_since(std::time::UNIX_EPOCH) |
254 | 2 | .as_secs(), |
255 | 2 | errors, |
256 | }) | |
257 | 2 | } |
258 | ||
259 | 0 | async fn execute_local_fallback(&mut self) -> Result<Vec<Match>> { |
260 | 0 | warn!("โ ๏ธ No workers available, falling back to local processing"); |
261 | ||
262 | 0 | let mut all_matches = Vec::new(); |
263 | 0 | for unit in &self.work_queue { |
264 | 0 | let mut result = self.process_work_unit(unit, "local_worker")?; |
265 | 0 | let matches = std::mem::take(&mut result.matches); |
266 | 0 | self.completed_work.insert(unit.id.clone(), result); |
267 | 0 | all_matches.extend(matches); |
268 | } | |
269 | ||
270 | 0 | Ok(all_matches) |
271 | 0 | } |
272 | ||
273 | 2 | fn estimate_processing_time(&self, files: &[PathBuf]) -> u64 { |
274 | // Simple estimation: 1ms per file + size factor | |
275 | 2 | let base_time = files.len() as u64; |
276 | 2 | let size_factor: u64 = files |
277 | 2 | .iter() |
278 | 3 | . |
279 | 3 | . |
280 | 2 | .sum(); |
281 | ||
282 | 2 | base_time + size_factor |
283 | 2 | } |
284 | ||
285 | 2 | fn calculate_priority(&self, files: &[PathBuf]) -> u8 { |
286 | // Higher priority for smaller batches (process quickly) | |
287 | // and files that are likely to have issues | |
288 | 2 | let size_priority = match files.len() { |
289 | 2 | 1..=10 => 200, |
290 | 0 | 11..=50 => 150, |
291 | 0 | 51..=100 => 100, |
292 | 0 | _ => 50, |
293 | }; | |
294 | ||
295 | // Boost priority for certain file types | |
296 | 2 | let type_priority = files |
297 | 2 | .iter() |
298 | 3 | . |
299 | 3 | . |
300 | 3 | . |
301 | 3 | "rs" => |
302 | 1 | "py" | "js" | "ts" => |
303 | 1 | _ => 10, |
304 | 3 | }) |
305 | 2 | .max() |
306 | 2 | .unwrap_or(0); |
307 | ||
308 | 2 | (size_priority + type_priority).min(255) as u8 |
309 | 2 | } |
310 | ||
311 | 2 | fn print_execution_summary(&self, duration: std::time::Duration, total_matches: usize) { |
312 | 2 | info!( |
313 | 2 | info!( |
314 | 2 | info!( |
315 | 2 | info!( |
316 | ||
317 | 2 | let stats = self.get_statistics(); |
318 | 2 | info!( |
319 | 2 | info!( |
320 | ||
321 | // Show worker utilization | |
322 | 8 | for ( |
323 | 6 | info!( |
324 | } | |
325 | 2 | } |
326 | } | |
327 | ||
328 | /// Statistics for distributed scanning | |
329 | #[derive(Debug, Clone)] | |
330 | pub struct DistributedStats { | |
331 | pub total_workers: usize, | |
332 | pub total_work_units: usize, | |
333 | pub completed_units: usize, | |
334 | pub total_files_processed: usize, | |
335 | pub total_processing_time_ms: u64, | |
336 | pub worker_utilization: HashMap<String, f64>, | |
337 | pub average_unit_size: f64, | |
338 | } | |
339 | ||
340 | impl Default for DistributedCoordinator { | |
341 | 0 | fn default() -> Self { |
342 | 0 | Self::new() |
343 | 0 | } |
344 | } | |
345 | ||
346 | #[cfg(test)] | |
347 | mod tests { | |
348 | use super::*; | |
349 | use crate::detectors::TodoDetector; | |
350 | use tempfile::TempDir; | |
351 | ||
352 | #[test] | |
353 | fn test_distributed_coordinator_creation() { | |
354 | let coordinator = DistributedCoordinator::new(); | |
355 | assert_eq!(coordinator.workers.len(), 0); | |
356 | assert_eq!(coordinator.work_queue.len(), 0); | |
357 | } | |
358 | ||
359 | #[test] | |
360 | fn test_worker_registration() { | |
361 | let mut coordinator = DistributedCoordinator::new(); | |
362 | ||
363 | let worker_config = WorkerConfig { | |
364 | worker_id: "test_worker".to_string(), | |
365 | max_concurrent_units: 4, | |
366 | supported_detectors: vec!["TODO".to_string()], | |
367 | cpu_cores: 8, | |
368 | memory_limit_mb: 4096, | |
369 | endpoint: None, | |
370 | }; | |
371 | ||
372 | coordinator.register_worker(worker_config); | |
373 | assert_eq!(coordinator.workers.len(), 1); | |
374 | } | |
375 | ||
376 | #[test] | |
377 | fn test_work_unit_creation() { | |
378 | let temp_dir = TempDir::new().unwrap(); | |
379 | let test_file = temp_dir.path().join("test.rs"); | |
380 | std::fs::write(&test_file, "// TODO: test").unwrap(); | |
381 | ||
382 | let mut coordinator = DistributedCoordinator::new(); | |
383 | coordinator.register_detector("TODO".to_string(), Box::new(TodoDetector)); | |
384 | ||
385 | let files = vec![test_file]; | |
386 | coordinator.create_work_units(files, 10).unwrap(); | |
387 | ||
388 | assert_eq!(coordinator.work_queue.len(), 1); | |
389 | assert_eq!(coordinator.work_queue[0].files.len(), 1); | |
390 | } | |
391 | } |
Line | Count | Source |
1 | use serde::{Deserialize, Serialize}; | |
2 | use std::collections::HashMap; | |
3 | ||
4 | use crate::Severity; | |
5 | ||
6 | /// Enhanced configuration for more flexible pattern detection | |
7 | #[derive(Debug, Clone, Serialize, Deserialize)] | |
8 | pub struct EnhancedScanConfig { | |
9 | /// Enabled pattern detectors | |
10 | pub enabled_detectors: Vec<DetectorType>, | |
11 | /// File extensions to include in scanning | |
12 | pub include_extensions: Vec<String>, | |
13 | /// File extensions to exclude from scanning | |
14 | pub exclude_extensions: Vec<String>, | |
15 | /// Paths to exclude from scanning (glob patterns) | |
16 | pub exclude_paths: Vec<String>, | |
17 | /// Maximum file size to scan (in bytes) | |
18 | pub max_file_size: Option<usize>, | |
19 | /// Custom regex patterns | |
20 | pub custom_patterns: HashMap<String, String>, | |
21 | /// Severity levels for different pattern types | |
22 | pub severity_levels: HashMap<String, Severity>, | |
23 | } | |
24 | ||
25 | /// Types of available pattern detectors | |
26 | #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] | |
27 | pub enum DetectorType { | |
28 | // Comment-based patterns | |
29 | Todo, | |
30 | Fixme, | |
31 | Hack, | |
32 | Bug, | |
33 | Xxx, | |
34 | Note, | |
35 | Warning, | |
36 | ||
37 | // Rust-specific patterns | |
38 | Panic, | |
39 | Unwrap, | |
40 | Expect, | |
41 | Unimplemented, | |
42 | Unreachable, | |
43 | ||
44 | // Performance patterns | |
45 | Clone, | |
46 | ToString, | |
47 | ||
48 | // Security patterns | |
49 | Unsafe, | |
50 | ||
51 | // Development/Phase patterns | |
52 | Dev, | |
53 | Debug, | |
54 | Test, | |
55 | Phase, | |
56 | Staging, | |
57 | ||
58 | // Non-production code patterns | |
59 | ConsoleLog, | |
60 | Print, | |
61 | Alert, | |
62 | Debugger, | |
63 | UnusedVar, | |
64 | DeadCode, | |
65 | Experimental, | |
66 | ||
67 | // LLM-specific security patterns | |
68 | LLMHallucinatedApi, | |
69 | LLMSQLInjection, | |
70 | LLMInsecureRandom, | |
71 | LLMHardcodedCredentials, | |
72 | LLMRustMemorySafety, | |
73 | LLMCryptoAntipattern, | |
74 | LLMXSSInjection, | |
75 | LLMFilesystemSecurity, | |
76 | LLMContextConfusion, | |
77 | ||
78 | // LLM-specific quality patterns | |
79 | LLMAsyncAntipattern, | |
80 | LLMPerformanceIssue, | |
81 | LLMErrorHandling, | |
82 | LLMOverengineering, | |
83 | LLMConfigAntipattern, | |
84 | LLMDatabaseAntipattern, | |
85 | LLMJSIssues, | |
86 | LLMPythonIssues, | |
87 | LLMGeneratedComments, | |
88 | ||
89 | // Advanced LLM-specific patterns | |
90 | LLMAIModelHallucination, | |
91 | LLMIncorrectAsync, | |
92 | LLMSecurityAntipattern, | |
93 | LLMDBAntipattern, | |
94 | LLMErrorHandlingMistake, | |
95 | LLMPerformanceMistake, | |
96 | LLMTypeMistake, | |
97 | ||
98 | // Comprehensive LLM detector | |
99 | LLMComprehensive, | |
100 | ||
101 | // Custom pattern with name | |
102 | Custom(String), | |
103 | } | |
104 | ||
105 | impl Default for EnhancedScanConfig { | |
106 | 0 | fn default() -> Self { |
107 | 0 | let mut severity_levels = HashMap::new(); |
108 | 0 | severity_levels.insert("TODO".to_string(), Severity::Low); |
109 | 0 | severity_levels.insert("FIXME".to_string(), Severity::Medium); |
110 | 0 | severity_levels.insert("HACK".to_string(), Severity::High); |
111 | 0 | severity_levels.insert("BUG".to_string(), Severity::High); |
112 | 0 | severity_levels.insert("XXX".to_string(), Severity::Critical); |
113 | 0 | severity_levels.insert("PANIC".to_string(), Severity::High); |
114 | 0 | severity_levels.insert("UNWRAP".to_string(), Severity::Medium); |
115 | 0 | severity_levels.insert("UNSAFE".to_string(), Severity::High); |
116 | ||
117 | // Development/Phase patterns | |
118 | 0 | severity_levels.insert("DEV".to_string(), Severity::High); |
119 | 0 | severity_levels.insert("DEBUG".to_string(), Severity::Medium); |
120 | 0 | severity_levels.insert("TEST".to_string(), Severity::Medium); |
121 | 0 | severity_levels.insert("PHASE".to_string(), Severity::Medium); |
122 | 0 | severity_levels.insert("STAGING".to_string(), Severity::High); |
123 | ||
124 | // Non-production code patterns | |
125 | 0 | severity_levels.insert("CONSOLE_LOG".to_string(), Severity::High); |
126 | 0 | severity_levels.insert("PRINT".to_string(), Severity::Medium); |
127 | 0 | severity_levels.insert("ALERT".to_string(), Severity::High); |
128 | 0 | severity_levels.insert("DEBUGGER".to_string(), Severity::Critical); |
129 | 0 | severity_levels.insert("UNUSED_VAR".to_string(), Severity::Low); |
130 | 0 | severity_levels.insert("DEAD_CODE".to_string(), Severity::Medium); |
131 | 0 | severity_levels.insert("EXPERIMENTAL".to_string(), Severity::Medium); |
132 | ||
133 | // LLM-specific security patterns (high priority) | |
134 | 0 | severity_levels.insert("LLM_HALLUCINATED_API".to_string(), Severity::High); |
135 | 0 | severity_levels.insert("LLM_SQL_INJECTION".to_string(), Severity::Critical); |
136 | 0 | severity_levels.insert("LLM_INSECURE_RANDOM".to_string(), Severity::High); |
137 | 0 | severity_levels.insert("LLM_HARDCODED_CREDENTIALS".to_string(), Severity::Critical); |
138 | 0 | severity_levels.insert("LLM_RUST_MEMORY_SAFETY".to_string(), Severity::High); |
139 | 0 | severity_levels.insert("LLM_CRYPTO_ANTIPATTERN".to_string(), Severity::High); |
140 | 0 | severity_levels.insert("LLM_XSS_INJECTION".to_string(), Severity::Critical); |
141 | 0 | severity_levels.insert("LLM_FILESYSTEM_SECURITY".to_string(), Severity::High); |
142 | 0 | severity_levels.insert("LLM_CONTEXT_CONFUSION".to_string(), Severity::High); |
143 | ||
144 | // LLM-specific quality patterns (medium priority) | |
145 | 0 | severity_levels.insert("LLM_ASYNC_ANTIPATTERN".to_string(), Severity::Medium); |
146 | 0 | severity_levels.insert("LLM_PERFORMANCE_ISSUE".to_string(), Severity::Medium); |
147 | 0 | severity_levels.insert("LLM_ERROR_HANDLING".to_string(), Severity::Medium); |
148 | 0 | severity_levels.insert("LLM_OVERENGINEERING".to_string(), Severity::Low); |
149 | 0 | severity_levels.insert("LLM_CONFIG_ANTIPATTERN".to_string(), Severity::Medium); |
150 | 0 | severity_levels.insert("LLM_DATABASE_ANTIPATTERN".to_string(), Severity::Medium); |
151 | 0 | severity_levels.insert("LLM_JS_ISSUES".to_string(), Severity::Medium); |
152 | 0 | severity_levels.insert("LLM_PYTHON_ISSUES".to_string(), Severity::High); |
153 | 0 | severity_levels.insert("LLM_GENERATED_COMMENT".to_string(), Severity::Info); |
154 | ||
155 | // Advanced LLM-specific patterns | |
156 | 0 | severity_levels.insert("LLM_AI_MODEL_HALLUCINATION".to_string(), Severity::High); |
157 | 0 | severity_levels.insert("LLM_INCORRECT_ASYNC".to_string(), Severity::Medium); |
158 | 0 | severity_levels.insert("LLM_SECURITY_ANTIPATTERN".to_string(), Severity::Critical); |
159 | 0 | severity_levels.insert("LLM_DB_ANTIPATTERN".to_string(), Severity::High); |
160 | 0 | severity_levels.insert("LLM_ERROR_HANDLING_MISTAKE".to_string(), Severity::Medium); |
161 | 0 | severity_levels.insert("LLM_PERFORMANCE_MISTAKE".to_string(), Severity::Medium); |
162 | 0 | severity_levels.insert("LLM_TYPE_MISTAKE".to_string(), Severity::Low); |
163 | ||
164 | 0 | Self { |
165 | 0 | enabled_detectors: vec![DetectorType::Todo, DetectorType::Fixme], |
166 | 0 | include_extensions: vec![ |
167 | 0 | "rs".to_string(), |
168 | 0 | "py".to_string(), |
169 | 0 | "js".to_string(), |
170 | 0 | "ts".to_string(), |
171 | 0 | "tsx".to_string(), |
172 | 0 | "jsx".to_string(), |
173 | 0 | "java".to_string(), |
174 | 0 | "cs".to_string(), |
175 | 0 | "cpp".to_string(), |
176 | 0 | "cxx".to_string(), |
177 | 0 | "c".to_string(), |
178 | 0 | "h".to_string(), |
179 | 0 | "hpp".to_string(), |
180 | 0 | "go".to_string(), |
181 | 0 | "php".to_string(), |
182 | 0 | "rb".to_string(), |
183 | 0 | "kt".to_string(), |
184 | 0 | "swift".to_string(), |
185 | 0 | "dart".to_string(), |
186 | 0 | "scala".to_string(), |
187 | 0 | "sh".to_string(), |
188 | 0 | "ps1".to_string(), |
189 | 0 | "sql".to_string(), |
190 | 0 | "html".to_string(), |
191 | 0 | "vue".to_string(), |
192 | 0 | "svelte".to_string(), |
193 | 0 | "md".to_string(), |
194 | 0 | "txt".to_string(), |
195 | 0 | "yml".to_string(), |
196 | 0 | "yaml".to_string(), |
197 | 0 | "json".to_string(), |
198 | 0 | "toml".to_string(), |
199 | 0 | ], |
200 | 0 | exclude_extensions: vec![ |
201 | 0 | "exe".to_string(), |
202 | 0 | "dll".to_string(), |
203 | 0 | "so".to_string(), |
204 | 0 | "bin".to_string(), |
205 | 0 | "png".to_string(), |
206 | 0 | "jpg".to_string(), |
207 | 0 | "jpeg".to_string(), |
208 | 0 | "gif".to_string(), |
209 | 0 | "pdf".to_string(), |
210 | 0 | "zip".to_string(), |
211 | 0 | ], |
212 | 0 | exclude_paths: vec![ |
213 | 0 | "target/*".to_string(), |
214 | 0 | "node_modules/*".to_string(), |
215 | 0 | ".git/*".to_string(), |
216 | 0 | "*.lock".to_string(), |
217 | 0 | "vendor/*".to_string(), |
218 | 0 | "build/*".to_string(), |
219 | 0 | ], |
220 | 0 | max_file_size: Some(1024 * 1024), // 1MB default |
221 | 0 | custom_patterns: HashMap::new(), |
222 | 0 | severity_levels, |
223 | 0 | } |
224 | 0 | } |
225 | } | |
226 | ||
227 | #[cfg(test)] | |
228 | mod tests { | |
229 | use super::*; | |
230 | ||
231 | #[test] | |
232 | fn test_enhanced_scan_config_default() { | |
233 | let config = EnhancedScanConfig::default(); | |
234 | ||
235 | assert!(!config.enabled_detectors.is_empty()); | |
236 | assert!(config.enabled_detectors.contains(&DetectorType::Todo)); | |
237 | assert!(config.enabled_detectors.contains(&DetectorType::Fixme)); | |
238 | assert!(config.include_extensions.contains(&"rs".to_string())); | |
239 | assert!(config.exclude_paths.contains(&"target/*".to_string())); | |
240 | assert_eq!(config.max_file_size, Some(1024 * 1024)); | |
241 | } | |
242 | ||
243 | #[test] | |
244 | fn test_detector_type_equality() { | |
245 | assert_eq!(DetectorType::Todo, DetectorType::Todo); | |
246 | assert_ne!(DetectorType::Todo, DetectorType::Fixme); | |
247 | assert_eq!( | |
248 | DetectorType::Custom("test".to_string()), | |
249 | DetectorType::Custom("test".to_string()) | |
250 | ); | |
251 | assert_ne!( | |
252 | DetectorType::Custom("test1".to_string()), | |
253 | DetectorType::Custom("test2".to_string()) | |
254 | ); | |
255 | } | |
256 | ||
257 | #[test] | |
258 | fn test_detector_type_serialization() { | |
259 | let detector = DetectorType::Todo; | |
260 | let json = serde_json::to_string(&detector).unwrap(); | |
261 | let deserialized: DetectorType = serde_json::from_str(&json).unwrap(); | |
262 | assert_eq!(detector, deserialized); | |
263 | ||
264 | // Test custom detector | |
265 | let custom_detector = DetectorType::Custom("my_pattern".to_string()); | |
266 | let json = serde_json::to_string(&custom_detector).unwrap(); | |
267 | let deserialized: DetectorType = serde_json::from_str(&json).unwrap(); | |
268 | assert_eq!(custom_detector, deserialized); | |
269 | } | |
270 | ||
271 | #[test] | |
272 | fn test_enhanced_config_serialization() { | |
273 | let config = EnhancedScanConfig::default(); | |
274 | let json = serde_json::to_string(&config).unwrap(); | |
275 | let deserialized: EnhancedScanConfig = serde_json::from_str(&json).unwrap(); | |
276 | ||
277 | assert_eq!(config.enabled_detectors, deserialized.enabled_detectors); | |
278 | assert_eq!(config.include_extensions, deserialized.include_extensions); | |
279 | assert_eq!(config.exclude_paths, deserialized.exclude_paths); | |
280 | assert_eq!(config.max_file_size, deserialized.max_file_size); | |
281 | } | |
282 | ||
283 | #[test] | |
284 | fn test_severity_levels_defaults() { | |
285 | let config = EnhancedScanConfig::default(); | |
286 | ||
287 | // Test basic severity levels | |
288 | assert_eq!(config.severity_levels.get("TODO"), Some(&Severity::Low)); | |
289 | assert_eq!(config.severity_levels.get("FIXME"), Some(&Severity::Medium)); | |
290 | assert_eq!(config.severity_levels.get("HACK"), Some(&Severity::High)); | |
291 | assert_eq!(config.severity_levels.get("XXX"), Some(&Severity::Critical)); | |
292 | ||
293 | // Test LLM-specific patterns | |
294 | assert_eq!( | |
295 | config.severity_levels.get("LLM_SQL_INJECTION"), | |
296 | Some(&Severity::Critical) | |
297 | ); | |
298 | assert_eq!( | |
299 | config.severity_levels.get("LLM_HARDCODED_CREDENTIALS"), | |
300 | Some(&Severity::Critical) | |
301 | ); | |
302 | assert_eq!( | |
303 | config.severity_levels.get("LLM_XSS_INJECTION"), | |
304 | Some(&Severity::Critical) | |
305 | ); | |
306 | ||
307 | // Test development patterns | |
308 | assert_eq!(config.severity_levels.get("DEV"), Some(&Severity::High)); | |
309 | assert_eq!( | |
310 | config.severity_levels.get("CONSOLE_LOG"), | |
311 | Some(&Severity::High) | |
312 | ); | |
313 | assert_eq!( | |
314 | config.severity_levels.get("DEBUGGER"), | |
315 | Some(&Severity::Critical) | |
316 | ); | |
317 | } | |
318 | ||
319 | #[test] | |
320 | fn test_all_detector_types_coverage() { | |
321 | // Test all basic detector types | |
322 | let basic_detectors = vec![ | |
323 | DetectorType::Todo, | |
324 | DetectorType::Fixme, | |
325 | DetectorType::Hack, | |
326 | DetectorType::Bug, | |
327 | DetectorType::Xxx, | |
328 | DetectorType::Note, | |
329 | DetectorType::Warning, | |
330 | ]; | |
331 | ||
332 | for detector in basic_detectors { | |
333 | let cloned = detector.clone(); | |
334 | assert_eq!(detector, cloned); | |
335 | } | |
336 | ||
337 | // Test Rust-specific detectors | |
338 | let rust_detectors = vec![ | |
339 | DetectorType::Panic, | |
340 | DetectorType::Unwrap, | |
341 | DetectorType::Expect, | |
342 | DetectorType::Unimplemented, | |
343 | DetectorType::Unreachable, | |
344 | ]; | |
345 | ||
346 | for detector in rust_detectors { | |
347 | let cloned = detector.clone(); | |
348 | assert_eq!(detector, cloned); | |
349 | } | |
350 | ||
351 | // Test LLM-specific detectors | |
352 | let llm_detectors = vec![ | |
353 | DetectorType::LLMHallucinatedApi, | |
354 | DetectorType::LLMSQLInjection, | |
355 | DetectorType::LLMInsecureRandom, | |
356 | DetectorType::LLMHardcodedCredentials, | |
357 | DetectorType::LLMComprehensive, | |
358 | ]; | |
359 | ||
360 | for detector in llm_detectors { | |
361 | let cloned = detector.clone(); | |
362 | assert_eq!(detector, cloned); | |
363 | } | |
364 | } | |
365 | ||
366 | #[test] | |
367 | fn test_config_with_custom_patterns() { | |
368 | let mut config = EnhancedScanConfig::default(); | |
369 | config | |
370 | .custom_patterns | |
371 | .insert("CUSTOM_PATTERN".to_string(), r"CUSTOM:\s*(.+)".to_string()); | |
372 | config | |
373 | .severity_levels | |
374 | .insert("CUSTOM_PATTERN".to_string(), Severity::Medium); | |
375 | ||
376 | assert_eq!(config.custom_patterns.len(), 1); | |
377 | assert_eq!( | |
378 | config.severity_levels.get("CUSTOM_PATTERN"), | |
379 | Some(&Severity::Medium) | |
380 | ); | |
381 | ||
382 | // Test that custom pattern is properly stored | |
383 | assert_eq!( | |
384 | config.custom_patterns.get("CUSTOM_PATTERN"), | |
385 | Some(&r"CUSTOM:\s*(.+)".to_string()) | |
386 | ); | |
387 | } | |
388 | ||
389 | #[test] | |
390 | fn test_file_extension_filters() { | |
391 | let config = EnhancedScanConfig::default(); | |
392 | ||
393 | // Verify common programming languages are included | |
394 | assert!(config.include_extensions.contains(&"rs".to_string())); | |
395 | assert!(config.include_extensions.contains(&"py".to_string())); | |
396 | assert!(config.include_extensions.contains(&"js".to_string())); | |
397 | assert!(config.include_extensions.contains(&"java".to_string())); | |
398 | assert!(config.include_extensions.contains(&"go".to_string())); | |
399 | ||
400 | // Verify binary files are excluded | |
401 | assert!(config.exclude_extensions.contains(&"exe".to_string())); | |
402 | assert!(config.exclude_extensions.contains(&"dll".to_string())); | |
403 | assert!(config.exclude_extensions.contains(&"png".to_string())); | |
404 | assert!(config.exclude_extensions.contains(&"zip".to_string())); | |
405 | } | |
406 | ||
407 | #[test] | |
408 | fn test_path_exclusion_patterns() { | |
409 | let config = EnhancedScanConfig::default(); | |
410 | ||
411 | // Verify common build directories are excluded | |
412 | assert!(config.exclude_paths.contains(&"target/*".to_string())); | |
413 | assert!(config.exclude_paths.contains(&"node_modules/*".to_string())); | |
414 | assert!(config.exclude_paths.contains(&".git/*".to_string())); | |
415 | assert!(config.exclude_paths.contains(&"build/*".to_string())); | |
416 | assert!(config.exclude_paths.contains(&"vendor/*".to_string())); | |
417 | } | |
418 | ||
419 | #[test] | |
420 | fn test_config_clone() { | |
421 | let config = EnhancedScanConfig::default(); | |
422 | let cloned = config.clone(); | |
423 | ||
424 | assert_eq!(config.enabled_detectors, cloned.enabled_detectors); | |
425 | assert_eq!(config.include_extensions, cloned.include_extensions); | |
426 | assert_eq!(config.exclude_extensions, cloned.exclude_extensions); | |
427 | assert_eq!(config.exclude_paths, cloned.exclude_paths); | |
428 | assert_eq!(config.max_file_size, cloned.max_file_size); | |
429 | assert_eq!(config.custom_patterns, cloned.custom_patterns); | |
430 | assert_eq!(config.severity_levels, cloned.severity_levels); | |
431 | } | |
432 | ||
433 | #[test] | |
434 | fn test_llm_security_patterns_comprehensive() { | |
435 | let config = EnhancedScanConfig::default(); | |
436 | ||
437 | // Verify all critical security patterns are marked as Critical or High | |
438 | let critical_patterns = vec![ | |
439 | "LLM_SQL_INJECTION", | |
440 | "LLM_HARDCODED_CREDENTIALS", | |
441 | "LLM_XSS_INJECTION", | |
442 | "DEBUGGER", | |
443 | "XXX", | |
444 | ]; | |
445 | ||
446 | for pattern in critical_patterns { | |
447 | let severity = config.severity_levels.get(pattern); | |
448 | assert!( | |
449 | severity == Some(&Severity::Critical) || severity == Some(&Severity::High), | |
450 | "Pattern {} should be Critical or High severity, got: {:?}", | |
451 | pattern, | |
452 | severity | |
453 | ); | |
454 | } | |
455 | } | |
456 | } |
Line | Count | Source |
1 | use crate::{Match, PatternDetector}; | |
2 | use anyhow::Result; | |
3 | ||
4 | use serde::{Deserialize, Serialize}; | |
5 | use std::collections::HashMap; | |
6 | use std::path::{Path, PathBuf}; | |
7 | use std::time::{SystemTime, UNIX_EPOCH}; | |
8 | ||
9 | /// File metadata for incremental scanning | |
10 | #[derive(Debug, Clone, Serialize, Deserialize)] | |
11 | pub struct FileMetadata { | |
12 | pub path: PathBuf, | |
13 | pub modified_time: u64, | |
14 | pub size: u64, | |
15 | pub hash: Option<String>, | |
16 | pub last_scan_time: u64, | |
17 | pub match_count: usize, | |
18 | pub content_hash: Option<String>, // For more accurate change detection | |
19 | pub detector_hash: Option<String>, // Hash of detector configuration | |
20 | } | |
21 | ||
22 | /// Incremental scan state persistence | |
23 | #[derive(Debug, Clone, Serialize, Deserialize, Default)] | |
24 | pub struct IncrementalState { | |
25 | pub last_full_scan: u64, | |
26 | pub file_metadata: HashMap<PathBuf, FileMetadata>, | |
27 | pub scan_history: Vec<IncrementalScanResult>, | |
28 | } | |
29 | ||
30 | /// Result of an incremental scan | |
31 | #[derive(Debug, Clone, Serialize, Deserialize)] | |
32 | pub struct IncrementalScanResult { | |
33 | pub timestamp: u64, | |
34 | pub files_scanned: usize, | |
35 | pub files_skipped: usize, | |
36 | pub files_modified: usize, | |
37 | pub files_added: usize, | |
38 | pub files_removed: usize, | |
39 | pub total_matches: usize, | |
40 | pub scan_duration_ms: u64, | |
41 | } | |
42 | ||
43 | /// Incremental scanner that only scans changed files | |
44 | pub struct IncrementalScanner { | |
45 | detectors: Vec<Box<dyn PatternDetector>>, | |
46 | state: IncrementalState, | |
47 | state_file: PathBuf, | |
48 | force_rescan_threshold: u64, // Days after which to force full rescan | |
49 | } | |
50 | ||
51 | impl IncrementalScanner { | |
52 | /// Create a new incremental scanner | |
53 | 4 | pub fn new(detectors: Vec<Box<dyn PatternDetector>>, state_file: PathBuf) -> Result<Self> { |
54 | 4 | let state = if state_file.exists() { |
55 | 2 | let content = std::fs::read_to_string(&state_file) |
56 | 2 | serde_json::from_str(&content).unwrap_or_default() |
57 | } else { | |
58 | 2 | IncrementalState::default() |
59 | }; | |
60 | ||
61 | 4 | Ok(Self { |
62 | 4 | detectors, |
63 | 4 | state, |
64 | 4 | state_file, |
65 | 4 | force_rescan_threshold: 7, // 7 days |
66 | 4 | }) |
67 | 4 | } |
68 | ||
69 | /// Perform incremental scan | |
70 | 4 | pub fn scan_incremental(&mut self, root: &Path) -> Result<(Vec<Match>, IncrementalScanResult)> { |
71 | 4 | let start_time = std::time::Instant::now(); |
72 | 4 | let scan_timestamp = SystemTime::now().duration_since(UNIX_EPOCH) |
73 | ||
74 | 4 | let mut all_matches = Vec::new(); |
75 | 4 | let mut files_scanned = 0; |
76 | 4 | let mut files_skipped = 0; |
77 | 4 | let mut files_modified = 0; |
78 | 4 | let mut files_added = 0; |
79 | 4 | let mut files_removed = 0; |
80 | ||
81 | // Check if we need a full rescan | |
82 | 4 | let days_since_full_scan = (scan_timestamp - self.state.last_full_scan) / (24 * 60 * 60); |
83 | 4 | let force_full_scan = days_since_full_scan > self.force_rescan_threshold; |
84 | ||
85 | 4 | if force_full_scan { |
86 | 2 | println!( |
87 | 2 | "๐ Performing full rescan (last full scan: {} days ago)", |
88 | 2 | days_since_full_scan |
89 | 2 | ); |
90 | 2 | self.state.last_full_scan = scan_timestamp; |
91 | 2 | self.state.file_metadata.clear(); |
92 | 2 | } |
93 | ||
94 | // Collect current files | |
95 | 4 | let current_files = self.collect_files(root) |
96 | 4 | let mut current_file_set = std::collections::HashSet::new(); |
97 | ||
98 | 14 | for |
99 | 10 | current_file_set.insert(file_path.clone()); |
100 | ||
101 | 10 | if let Some(metadata) = self.get_file_metadata(&file_path) |
102 | 10 | let existing_metadata = self.state.file_metadata.get(&file_path); |
103 | ||
104 | 10 | let needs_scan = match existing_metadata { |
105 | 2 | Some(existing) => { |
106 | // Check if file has been modified | |
107 | 2 | existing.modified_time != metadata.modified_time |
108 | 2 | || existing.size != metadata.size |
109 | 1 | || force_full_scan |
110 | } | |
111 | None => { | |
112 | // New file | |
113 | 8 | files_added += 1; |
114 | 8 | true |
115 | } | |
116 | }; | |
117 | ||
118 | 10 | if needs_scan { |
119 | 9 | if existing_metadata.is_some() { |
120 | 1 | files_modified += 1; |
121 | 8 | } |
122 | ||
123 | // Scan the file - skip if not valid UTF-8 (like binary files) | |
124 | 9 | let |
125 | 5 | Ok(content) => content, |
126 | 4 | Err(_) => continue, // Skip files that can't be read as UTF-8 |
127 | }; | |
128 | 5 | let file_matches: Vec<Match> = self |
129 | 5 | .detectors |
130 | 5 | .iter() |
131 | 10 | . |
132 | 5 | .collect(); |
133 | ||
134 | 5 | let updated_metadata = FileMetadata { |
135 | 5 | path: file_path.clone(), |
136 | 5 | modified_time: metadata.modified_time, |
137 | 5 | size: metadata.size, |
138 | 5 | hash: metadata.hash, |
139 | 5 | last_scan_time: scan_timestamp, |
140 | 5 | match_count: file_matches.len(), |
141 | 5 | content_hash: metadata.content_hash, |
142 | 5 | detector_hash: metadata.detector_hash, |
143 | 5 | }; |
144 | ||
145 | 5 | self.state.file_metadata.insert(file_path, updated_metadata); |
146 | 5 | all_matches.extend(file_matches); |
147 | 5 | files_scanned += 1; |
148 | 1 | } else { |
149 | 1 | // File unchanged, use cached results |
150 | 1 | files_skipped += 1; |
151 | 1 | |
152 | 1 | // For complete results, we'd need to store and retrieve cached matches |
153 | 1 | // For now, we'll just note that the file was skipped |
154 | 1 | } |
155 | 0 | } |
156 | } | |
157 | ||
158 | // Find removed files | |
159 | 4 | let existing_files: Vec<PathBuf> = self.state.file_metadata.keys().cloned().collect(); |
160 | 10 | for |
161 | 6 | if !current_file_set.contains(&existing_file) { |
162 | 0 | self.state.file_metadata.remove(&existing_file); |
163 | 0 | files_removed += 1; |
164 | 6 | } |
165 | } | |
166 | ||
167 | 4 | let scan_duration = start_time.elapsed(); |
168 | 4 | let result = IncrementalScanResult { |
169 | 4 | timestamp: scan_timestamp, |
170 | 4 | files_scanned, |
171 | 4 | files_skipped, |
172 | 4 | files_modified, |
173 | 4 | files_added, |
174 | 4 | files_removed, |
175 | 4 | total_matches: all_matches.len(), |
176 | 4 | scan_duration_ms: scan_duration.as_millis() as u64, |
177 | 4 | }; |
178 | ||
179 | // Save state | |
180 | 4 | self.save_state() |
181 | ||
182 | // Update scan history | |
183 | 4 | self.state.scan_history.push(result.clone()); |
184 | 4 | if self.state.scan_history.len() > 100 { |
185 | 0 | self.state.scan_history.remove(0); // Keep last 100 scans |
186 | 4 | } |
187 | ||
188 | 4 | println!("๐ Incremental scan completed:"); |
189 | 4 | println!( |
190 | 4 | " Files scanned: {} | Skipped: {} | Modified: {} | Added: {} | Removed: {}", |
191 | files_scanned, files_skipped, files_modified, files_added, files_removed | |
192 | ); | |
193 | 4 | println!( |
194 | 4 | " Speed improvement: {:.1}x faster than full scan", |
195 | 4 | self.calculate_speedup(files_scanned, files_skipped) |
196 | ); | |
197 | ||
198 | 4 | Ok((all_matches, result)) |
199 | 4 | } |
200 | ||
201 | /// Force a full rescan on next scan | |
202 | 0 | pub fn force_full_rescan(&mut self) { |
203 | 0 | self.state.last_full_scan = 0; |
204 | 0 | self.state.file_metadata.clear(); |
205 | 0 | } |
206 | ||
207 | /// Get incremental scan statistics | |
208 | 0 | pub fn get_statistics(&self) -> IncrementalStats { |
209 | 0 | let recent_scans = self |
210 | 0 | .state |
211 | 0 | .scan_history |
212 | 0 | .iter() |
213 | 0 | .rev() |
214 | 0 | .take(10) |
215 | 0 | .collect::<Vec<_>>(); |
216 | ||
217 | 0 | let avg_speedup = if !recent_scans.is_empty() { |
218 | 0 | recent_scans |
219 | 0 | .iter() |
220 | 0 | .map(|scan| self.calculate_speedup(scan.files_scanned, scan.files_skipped)) |
221 | 0 | .sum::<f64>() |
222 | 0 | / recent_scans.len() as f64 |
223 | } else { | |
224 | 0 | 1.0 |
225 | }; | |
226 | ||
227 | IncrementalStats { | |
228 | 0 | total_files_tracked: self.state.file_metadata.len(), |
229 | 0 | last_scan_time: recent_scans.first().map(|s| s.timestamp), |
230 | 0 | average_speedup: avg_speedup, |
231 | 0 | cache_hit_rate: if !recent_scans.is_empty() { |
232 | 0 | let total_files = recent_scans |
233 | 0 | .iter() |
234 | 0 | .map(|s| s.files_scanned + s.files_skipped) |
235 | 0 | .sum::<usize>(); |
236 | 0 | let total_skipped = recent_scans.iter().map(|s| s.files_skipped).sum::<usize>(); |
237 | 0 | if total_files > 0 { |
238 | 0 | total_skipped as f64 / total_files as f64 |
239 | } else { | |
240 | 0 | 0.0 |
241 | } | |
242 | } else { | |
243 | 0 | 0.0 |
244 | }, | |
245 | 0 | scan_history_count: self.state.scan_history.len(), |
246 | } | |
247 | 0 | } |
248 | ||
249 | 4 | fn collect_files(&self, root: &Path) -> Result<Vec<PathBuf>> { |
250 | use ignore::WalkBuilder; | |
251 | ||
252 | 4 | let mut files = Vec::new(); |
253 | 14 | for entry in |
254 | 14 | let entry = entry |
255 | 14 | if entry.file_type().is_some_and(|ft| ft.is_file()) { |
256 | 10 | files.push(entry.path().to_path_buf()); |
257 | 10 |
|
258 | } | |
259 | 4 | Ok(files) |
260 | 4 | } |
261 | ||
262 | 10 | fn get_file_metadata(&self, path: &Path) -> Result<Option<FileMetadata>> { |
263 | 10 | if let Ok(metadata) = std::fs::metadata(path) { |
264 | 10 | let modified_time = metadata.modified() |
265 | ||
266 | // Calculate content hash for accurate change detection | |
267 | 10 | let (hash, content_hash) = if metadata.len() < 2 * 1024 * 1024 { |
268 | // Hash files < 2MB | |
269 | 10 | let content_hash = self.calculate_content_hash(path).ok(); |
270 | 10 | let quick_hash = self.calculate_file_hash(path).ok(); |
271 | 10 | (quick_hash, content_hash) |
272 | } else { | |
273 | // For larger files, use size + modified time as hash | |
274 | 0 | let size_hash = format!("{:x}", metadata.len()); |
275 | 0 | (Some(size_hash), None) |
276 | }; | |
277 | ||
278 | // Calculate detector configuration hash for cache invalidation | |
279 | 10 | let detector_hash = self.calculate_detector_hash(); |
280 | ||
281 | 10 | Ok(Some(FileMetadata { |
282 | 10 | path: path.to_path_buf(), |
283 | 10 | modified_time, |
284 | 10 | size: metadata.len(), |
285 | 10 | hash, |
286 | 10 | last_scan_time: 0, |
287 | 10 | match_count: 0, |
288 | 10 | content_hash, |
289 | 10 | detector_hash: Some(detector_hash), |
290 | 10 | })) |
291 | } else { | |
292 | 0 | Ok(None) |
293 | } | |
294 | 10 | } |
295 | ||
296 | 10 | fn calculate_file_hash(&self, path: &Path) -> Result<String> { |
297 | use std::collections::hash_map::DefaultHasher; | |
298 | use std::hash::{Hash, Hasher}; | |
299 | ||
300 | 10 | let content = std::fs::read(path) |
301 | 10 | let mut hasher = DefaultHasher::new(); |
302 | 10 | content.hash(&mut hasher); |
303 | 10 | Ok(format!("{:x}", hasher.finish())) |
304 | 10 | } |
305 | ||
306 | 10 | fn calculate_content_hash(&self, path: &Path) -> Result<String> { |
307 | use std::collections::hash_map::DefaultHasher; | |
308 | use std::hash::{Hash, Hasher}; | |
309 | ||
310 | 10 | let |
311 | 6 | let mut hasher = DefaultHasher::new(); |
312 | 6 | content.hash(&mut hasher); |
313 | 6 | Ok(format!("{:x}", hasher.finish())) |
314 | 10 | } |
315 | ||
316 | 10 | fn calculate_detector_hash(&self) -> String { |
317 | use std::collections::hash_map::DefaultHasher; | |
318 | use std::hash::{Hash, Hasher}; | |
319 | ||
320 | 10 | let mut hasher = DefaultHasher::new(); |
321 | // Hash detector count and types for cache invalidation when detectors change | |
322 | 10 | self.detectors.len().hash(&mut hasher); |
323 | 20 | for (i, _detector) in |
324 | 20 | i.hash(&mut hasher); // Hash index as approximation |
325 | 20 | } |
326 | 10 | format!("{:x}", hasher.finish()) |
327 | 10 | } |
328 | ||
329 | 4 | fn calculate_speedup(&self, files_scanned: usize, files_skipped: usize) -> f64 { |
330 | 4 | let total_files = files_scanned + files_skipped; |
331 | 4 | if total_files > 0 && files_scanned > 0 { |
332 | 4 | total_files as f64 / files_scanned as f64 |
333 | } else { | |
334 | 0 | 1.0 |
335 | } | |
336 | 4 | } |
337 | ||
338 | 4 | fn save_state(&self) -> Result<()> { |
339 | 4 | let content = serde_json::to_string_pretty(&self.state) |
340 | 4 | std::fs::write(&self.state_file, content) |
341 | 4 | Ok(()) |
342 | 4 | } |
343 | } | |
344 | ||
345 | /// Statistics for incremental scanning | |
346 | #[derive(Debug, Clone)] | |
347 | pub struct IncrementalStats { | |
348 | pub total_files_tracked: usize, | |
349 | pub last_scan_time: Option<u64>, | |
350 | pub average_speedup: f64, | |
351 | pub cache_hit_rate: f64, | |
352 | pub scan_history_count: usize, | |
353 | } | |
354 | ||
355 | #[cfg(test)] | |
356 | mod tests { | |
357 | use super::*; | |
358 | use crate::detectors::TodoDetector; | |
359 | use tempfile::{NamedTempFile, TempDir}; | |
360 | ||
361 | #[test] | |
362 | fn test_incremental_scanner_creation() { | |
363 | let temp_file = NamedTempFile::new().unwrap(); | |
364 | let detectors: Vec<Box<dyn PatternDetector>> = vec![Box::new(TodoDetector)]; | |
365 | ||
366 | let scanner = IncrementalScanner::new(detectors, temp_file.path().to_path_buf()); | |
367 | assert!(scanner.is_ok()); | |
368 | } | |
369 | ||
370 | #[test] | |
371 | fn test_file_metadata_tracking() { | |
372 | let temp_dir = TempDir::new().unwrap(); | |
373 | let test_file = temp_dir.path().join("test.rs"); | |
374 | std::fs::write(&test_file, "// TODO: test").unwrap(); | |
375 | ||
376 | let temp_state = NamedTempFile::new().unwrap(); | |
377 | let detectors: Vec<Box<dyn PatternDetector>> = vec![Box::new(TodoDetector)]; | |
378 | let mut scanner = | |
379 | IncrementalScanner::new(detectors, temp_state.path().to_path_buf()).unwrap(); | |
380 | ||
381 | // First scan | |
382 | let (matches1, result1) = scanner.scan_incremental(temp_dir.path()).unwrap(); | |
383 | assert_eq!(result1.files_added, 1); | |
384 | assert_eq!(result1.files_scanned, 1); | |
385 | assert_eq!(matches1.len(), 1); | |
386 | ||
387 | // Second scan without changes - should skip file | |
388 | let (_matches2, result2) = scanner.scan_incremental(temp_dir.path()).unwrap(); | |
389 | assert_eq!(result2.files_skipped, 1); | |
390 | assert_eq!(result2.files_scanned, 0); | |
391 | } | |
392 | } |
Line | Count | Source |
1 | use anyhow::Result; | |
2 | use dashmap::DashMap; | |
3 | use ignore::WalkBuilder; | |
4 | use rayon::prelude::*; | |
5 | use std::path::Path; | |
6 | ||
7 | pub mod cache; | |
8 | pub mod config; | |
9 | pub mod custom_detectors; | |
10 | pub mod detector_factory; | |
11 | pub mod detectors; | |
12 | pub mod distributed; | |
13 | pub mod enhanced_config; | |
14 | pub mod incremental; | |
15 | pub mod llm_detectors; | |
16 | pub mod monitoring; | |
17 | pub mod optimized_scanner; | |
18 | pub mod performance; | |
19 | ||
20 | /// Represents a detected pattern match in a file. | |
21 | #[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] | |
22 | pub struct Match { | |
23 | /// The path to the file where the match was found. | |
24 | pub file_path: String, | |
25 | /// The line number (1-based) where the match starts. | |
26 | pub line_number: usize, | |
27 | /// The column number (1-based) where the match starts. | |
28 | pub column: usize, | |
29 | /// The type of pattern detected (e.g., "TODO", "FIXME"). | |
30 | pub pattern: String, | |
31 | /// The matched text or a descriptive message. | |
32 | pub message: String, | |
33 | } | |
34 | ||
35 | /// Severity levels for detected patterns. | |
36 | #[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] | |
37 | pub enum Severity { | |
38 | Info, | |
39 | Low, | |
40 | Medium, | |
41 | High, | |
42 | Critical, | |
43 | } | |
44 | ||
45 | /// Trait for detecting patterns in code content. | |
46 | /// Implementors should define how to find specific patterns like TODO or FIXME. | |
47 | pub trait PatternDetector: Send + Sync { | |
48 | /// Detects patterns in the given content and returns a list of matches. | |
49 | /// The file_path is provided for context, such as filtering by file type. | |
50 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match>; | |
51 | } | |
52 | ||
53 | /// A scanner that uses parallel processing to scan codebases for patterns. | |
54 | pub struct Scanner { | |
55 | detectors: Vec<Box<dyn PatternDetector>>, | |
56 | cache: DashMap<String, Vec<Match>>, | |
57 | } | |
58 | ||
59 | impl Scanner { | |
60 | /// Creates a new scanner with the given pattern detectors. | |
61 | 15 | pub fn new(detectors: Vec<Box<dyn PatternDetector>>) -> Self { |
62 | 15 | Self { |
63 | 15 | detectors, |
64 | 15 | cache: DashMap::new(), |
65 | 15 | } |
66 | 15 | } |
67 | ||
68 | /// Scans the directory tree starting from the given root path. | |
69 | /// Returns all matches found by the detectors. | |
70 | /// Uses parallel processing for performance with improved load balancing and caching. | |
71 | /// | |
72 | /// # Examples | |
73 | /// | |
74 | /// ``` | |
75 | /// use code_guardian_core::{Scanner, PatternDetector, Match}; | |
76 | /// use std::path::Path; | |
77 | /// | |
78 | /// struct MockDetector; | |
79 | /// impl PatternDetector for MockDetector { | |
80 | /// fn detect(&self, content: &str, _file_path: &Path) -> Vec<Match> { | |
81 | /// if content.contains("TODO") { | |
82 | /// vec![Match { | |
83 | /// file_path: "test.rs".to_string(), | |
84 | /// line_number: 1, | |
85 | /// column: 1, | |
86 | /// pattern: "TODO".to_string(), | |
87 | /// message: "TODO found".to_string(), | |
88 | /// }] | |
89 | /// } else { | |
90 | /// vec![] | |
91 | /// } | |
92 | /// } | |
93 | /// } | |
94 | /// | |
95 | /// let scanner = Scanner::new(vec![Box::new(MockDetector)]); | |
96 | /// // Note: This would scan actual files; in doctest, we can't create temp files easily | |
97 | /// ``` | |
98 | 15 | pub fn scan(&self, root: &Path) -> Result<Vec<Match>> { |
99 | 15 | let matches: Vec<Match> = WalkBuilder::new(root) |
100 | 15 | .build() |
101 | 15 | .par_bridge() |
102 | 45 | . |
103 | 45 | let |
104 | 43 | let file_type = entry.file_type() |
105 | 43 | if file_type.is_file() { |
106 | 30 | let path = entry.path(); |
107 | 30 | let path_str = path.to_string_lossy().to_string(); |
108 | 30 | if let Some( |
109 | 0 | Some(cached.clone()) |
110 | } else { | |
111 | 30 | let |
112 | 15 | let file_matches: Vec<Match> = self |
113 | 15 | .detectors |
114 | 15 | .par_iter() |
115 | 60 | . |
116 | 15 | .collect(); |
117 | 15 | self.cache.insert(path_str, file_matches.clone()); |
118 | 15 | Some(file_matches) |
119 | } | |
120 | } else { | |
121 | 13 | None |
122 | } | |
123 | 45 | }) |
124 | 15 | .flatten() |
125 | 15 | .collect(); |
126 | ||
127 | 15 | Ok(matches) |
128 | 15 | } |
129 | } | |
130 | ||
131 | // Re-export detectors and factory for convenience | |
132 | pub use cache::*; | |
133 | pub use custom_detectors::*; | |
134 | pub use detector_factory::*; | |
135 | pub use detectors::*; | |
136 | pub use distributed::*; | |
137 | pub use enhanced_config::*; | |
138 | pub use incremental::*; | |
139 | pub use llm_detectors::*; | |
140 | pub use monitoring::*; | |
141 | pub use optimized_scanner::*; | |
142 | pub use performance::*; | |
143 | ||
144 | #[cfg(test)] | |
145 | mod tests { | |
146 | use super::*; | |
147 | use std::path::PathBuf; | |
148 | ||
149 | #[test] | |
150 | fn test_todo_detector() { | |
151 | let detector = TodoDetector; | |
152 | let content = "Some code\n// TODO: fix this\nMore code"; | |
153 | let path = PathBuf::from("test.rs"); | |
154 | let matches = detector.detect(content, &path); | |
155 | assert_eq!(matches.len(), 1); | |
156 | assert_eq!(matches[0].pattern, "TODO"); | |
157 | assert_eq!(matches[0].line_number, 2); | |
158 | assert_eq!(matches[0].column, 4); // "// " is 3 chars, then TODO | |
159 | assert!(matches[0].message.contains("TODO")); | |
160 | } | |
161 | ||
162 | #[test] | |
163 | fn test_fixme_detector() { | |
164 | let detector = FixmeDetector; | |
165 | let content = "Code\nFIXME: issue here\nEnd"; | |
166 | let path = PathBuf::from("test.js"); | |
167 | let matches = detector.detect(content, &path); | |
168 | assert_eq!(matches.len(), 1); | |
169 | assert_eq!(matches[0].pattern, "FIXME"); | |
170 | assert_eq!(matches[0].line_number, 2); | |
171 | assert_eq!(matches[0].column, 1); | |
172 | assert!(matches[0].message.contains("FIXME")); | |
173 | } | |
174 | ||
175 | #[test] | |
176 | fn test_no_matches() { | |
177 | let detector = TodoDetector; | |
178 | let content = "No todos here"; | |
179 | let path = PathBuf::from("test.txt"); | |
180 | let matches = detector.detect(content, &path); | |
181 | assert_eq!(matches.len(), 0); | |
182 | } | |
183 | ||
184 | #[test] | |
185 | fn test_multiple_matches() { | |
186 | let detector = TodoDetector; | |
187 | let content = "TODO\n// TODO again"; | |
188 | let path = PathBuf::from("test.rs"); | |
189 | let matches = detector.detect(content, &path); | |
190 | assert_eq!(matches.len(), 2); | |
191 | } | |
192 | ||
193 | #[test] | |
194 | fn test_scanner_with_detectors() { | |
195 | let detectors: Vec<Box<dyn PatternDetector>> = | |
196 | vec![Box::new(TodoDetector), Box::new(FixmeDetector)]; | |
197 | let scanner = Scanner::new(detectors); | |
198 | // For testing, we can create a temp dir, but for simplicity, assume a test file exists. | |
199 | // Since it's hard to create files in test, perhaps mock or use a known path. | |
200 | // For now, skip integration test or use a string-based approach. | |
201 | // Actually, since scan reads files, for unit test, perhaps test the logic separately. | |
202 | // But to have coverage, perhaps create a temp file in test. | |
203 | use tempfile::TempDir; | |
204 | let temp_dir = TempDir::new().unwrap(); | |
205 | let file_path = temp_dir.path().join("test.rs"); | |
206 | std::fs::write(&file_path, "TODO: test\nFIXME: another").unwrap(); | |
207 | let matches = scanner.scan(temp_dir.path()).unwrap(); | |
208 | assert_eq!(matches.len(), 2); | |
209 | // Sort by pattern for deterministic test | |
210 | let mut sorted = matches; | |
211 | sorted.sort_by(|a, b| a.pattern.cmp(&b.pattern)); | |
212 | assert_eq!(sorted[0].pattern, "FIXME"); | |
213 | assert_eq!(sorted[1].pattern, "TODO"); | |
214 | } | |
215 | ||
216 | #[test] | |
217 | fn test_production_readiness_multi_language_scan() { | |
218 | use tempfile::TempDir; | |
219 | ||
220 | let temp_dir = TempDir::new().unwrap(); | |
221 | ||
222 | // Create test files with non-production code in different languages | |
223 | ||
224 | // JavaScript with console.log and debugger | |
225 | std::fs::write(temp_dir.path().join("app.js"), | |
226 | "function login(user) {\n console.log('User:', user);\n debugger;\n return true;\n}") | |
227 | .unwrap(); | |
228 | ||
229 | // TypeScript with alert | |
230 | std::fs::write( | |
231 | temp_dir.path().join("utils.ts"), | |
232 | "export function debug() {\n alert('Debug mode');\n // Phase 1 implementation\n}", | |
233 | ) | |
234 | .unwrap(); | |
235 | ||
236 | // Python with print and experimental code | |
237 | std::fs::write(temp_dir.path().join("main.py"), | |
238 | "def process_data():\n print('Processing...') # dev output\n # experimental algorithm\n pass") | |
239 | .unwrap(); | |
240 | ||
241 | // Rust with println! and unwrap | |
242 | std::fs::write(temp_dir.path().join("lib.rs"), | |
243 | "fn main() {\n println!(\"Debug info\");\n // TODO: remove debug\n let value = result.unwrap();\n}") | |
244 | .unwrap(); | |
245 | ||
246 | // Create production-ready detectors | |
247 | let detectors = crate::DetectorFactory::create_production_ready_detectors(); | |
248 | let scanner = Scanner::new(detectors); | |
249 | ||
250 | // Scan the test directory | |
251 | let matches = scanner.scan(temp_dir.path()).unwrap(); | |
252 | ||
253 | // Verify we found issues across languages | |
254 | assert!( | |
255 | matches.len() >= 6, | |
256 | "Should find multiple non-production patterns, found: {}", | |
257 | matches.len() | |
258 | ); | |
259 | ||
260 | // Verify specific patterns were detected across languages | |
261 | let patterns: Vec<&str> = matches.iter().map(|m| m.pattern.as_str()).collect(); | |
262 | ||
263 | // Verify critical non-production patterns are detected | |
264 | assert!( | |
265 | patterns.contains(&"CONSOLE_LOG"), | |
266 | "Should detect console.log in JavaScript" | |
267 | ); | |
268 | assert!( | |
269 | patterns.contains(&"DEBUGGER"), | |
270 | "Should detect debugger statements" | |
271 | ); | |
272 | assert!( | |
273 | patterns.contains(&"ALERT"), | |
274 | "Should detect alert in TypeScript" | |
275 | ); | |
276 | assert!( | |
277 | patterns.contains(&"PRINT"), | |
278 | "Should detect print statements" | |
279 | ); | |
280 | assert!( | |
281 | patterns.contains(&"DEV"), | |
282 | "Should detect dev environment references" | |
283 | ); | |
284 | assert!( | |
285 | patterns.contains(&"EXPERIMENTAL"), | |
286 | "Should detect experimental code" | |
287 | ); | |
288 | assert!(patterns.contains(&"PHASE"), "Should detect phase markers"); | |
289 | assert!( | |
290 | patterns.contains(&"UNWRAP"), | |
291 | "Should detect Rust unwrap calls" | |
292 | ); | |
293 | ||
294 | println!( | |
295 | "โ
Production readiness scan found {} issues across multiple languages", | |
296 | matches.len() | |
297 | ); | |
298 | for m in &matches { | |
299 | println!(" {} [{}] {}", m.file_path, m.pattern, m.message); | |
300 | } | |
301 | } | |
302 | } |
Line | Count | Source |
1 | use crate::{Match, PatternDetector}; | |
2 | use lazy_static::lazy_static; | |
3 | use regex::Regex; | |
4 | use std::path::Path; | |
5 | ||
6 | lazy_static! { | |
7 | // Hallucinated API patterns - APIs that LLMs commonly generate but don't exist | |
8 | pub static ref HALLUCINATED_API_REGEX: Regex = Regex::new( | |
9 | r"(?i)\.(authenticate|validateInput|sanitize|encryptData|hashPassword|secureRandom|generateToken|verifySignature|encodeBase64|decodeBase64|compressData|decompressData|validateEmail|validatePhone|formatCurrency|parseJson|serializeJson)\s*\(\s*\)" | |
10 | ).unwrap(); | |
11 | ||
12 | pub static ref INCOMPLETE_API_REGEX: Regex = Regex::new( | |
13 | r"(?:jwt\.sign\([^,)]*\)$|bcrypt\.hash\([^,)]*\)$|crypto\.createHash\([^)]*\)\.update\([^)]*\)$)" | |
14 | ).unwrap(); | |
15 | ||
16 | // SQL Injection patterns commonly generated by LLMs | |
17 | pub static ref SQL_INJECTION_REGEX: Regex = Regex::new( | |
18 | r#"(?i)["'](?:SELECT|INSERT|UPDATE|DELETE)\s+.*["']\s*\+|query\s*\(\s*[^?].*\+|\$\{[^}]*\}.*(?:SELECT|INSERT)"# | |
19 | ).unwrap(); | |
20 | ||
21 | // Insecure random patterns | |
22 | pub static ref INSECURE_RANDOM_REGEX: Regex = Regex::new( | |
23 | r"(?:Math\.random\(\)|Random\(\)\.nextInt|rand\(\)|random\.randint).*(?:password|token|key|secret|salt)" | |
24 | ).unwrap(); | |
25 | ||
26 | // Hardcoded credentials patterns | |
27 | pub static ref HARDCODED_CREDENTIALS_REGEX: Regex = Regex::new( | |
28 | r#"(?i)(?:password|api_key|secret|token|private_key)\s*[:=]\s*["'][^"']{8,}["']"# | |
29 | ).unwrap(); | |
30 | ||
31 | // Memory safety issues in Rust (LLM-specific patterns) | |
32 | pub static ref RUST_MEMORY_SAFETY_REGEX: Regex = Regex::new( | |
33 | r"(?:\.unwrap\(\)\s*;?\s*//.*safe|unsafe\s*\{[^}]*\}\s*//.*safe|transmute\s*\(|Box::from_raw\s*\()" | |
34 | ).unwrap(); | |
35 | ||
36 | // Async/await anti-patterns | |
37 | pub static ref ASYNC_ANTIPATTERN_REGEX: Regex = Regex::new( | |
38 | r"(?:await\s+\w+\s*;|\.then\(\s*await|return\s+await\s+Promise\.resolve|Promise\.all\([^)]*\)\s*;)" | |
39 | ).unwrap(); | |
40 | ||
41 | // Performance anti-patterns | |
42 | pub static ref PERFORMANCE_ANTIPATTERN_REGEX: Regex = Regex::new( | |
43 | r"(?:for.*for.*for.*for|\.sort\(\).*\.sort\(\)|\.clone\(\)\.clone\(\)|Vec::new\(\).*\.push.*for.*in)" | |
44 | ).unwrap(); | |
45 | ||
46 | // Error handling issues | |
47 | pub static ref ERROR_HANDLING_REGEX: Regex = Regex::new( | |
48 | r"(?:catch\s*\([^)]*\)\s*\{\s*\}|except\s*[^:]*:\s*pass|\.map_err\(.*\)\.unwrap\(\)|panic!\(.*result)" | |
49 | ).unwrap(); | |
50 | ||
51 | // Cryptographic anti-patterns | |
52 | pub static ref CRYPTO_ANTIPATTERN_REGEX: Regex = Regex::new( | |
53 | r"(?i)(?:MD5|SHA1|DES|RC4)\s*\(|AES.*ECB|new\s+Random\(\).*(?:key|salt)" | |
54 | ).unwrap(); | |
55 | ||
56 | // Over-engineering patterns | |
57 | pub static ref OVERENGINEERING_REGEX: Regex = Regex::new( | |
58 | r"(?:class.*Factory.*Factory|AbstractFactoryBuilder|\.map\(.*\)\.map\(.*\)\.map\()" | |
59 | ).unwrap(); | |
60 | ||
61 | // XSS and injection vulnerabilities | |
62 | pub static ref XSS_INJECTION_REGEX: Regex = Regex::new( | |
63 | r"(?:innerHTML\s*=.*\+|document\.write\(.*\+|eval\(.*request\.|exec\(.*input.*\))" | |
64 | ).unwrap(); | |
65 | ||
66 | // File system security issues | |
67 | pub static ref FILESYSTEM_SECURITY_REGEX: Regex = Regex::new( | |
68 | r"(?:open\(.*\+.*['\x22].*w|File\(.*\+.*\)|\.\.\/.*\.\.\/|Path\.join\(.*input)" | |
69 | ).unwrap(); | |
70 | ||
71 | // Configuration anti-patterns | |
72 | pub static ref CONFIG_ANTIPATTERN_REGEX: Regex = Regex::new( | |
73 | r"(?:localhost:\d+|127\.0\.0\.1:\d+|http://[^\x22]*[\x22]|port.*=.*\d{4,5})" | |
74 | ).unwrap(); | |
75 | ||
76 | // JavaScript-specific LLM issues | |
77 | pub static ref JS_LLM_ISSUES_REGEX: Regex = Regex::new( | |
78 | r"(?:==.*null|!=.*undefined|JSON\.parse\([^)]*\)\s*;|parseInt\([^,)]*\))" | |
79 | ).unwrap(); | |
80 | ||
81 | // Python-specific LLM issues | |
82 | pub static ref PYTHON_LLM_ISSUES_REGEX: Regex = Regex::new( | |
83 | r"(?:exec\(.*input\(|eval\(.*input\(|pickle\.loads\(.*request|__import__\(.*input)" | |
84 | ).unwrap(); | |
85 | ||
86 | // Context confusion patterns | |
87 | pub static ref CONTEXT_CONFUSION_REGEX: Regex = Regex::new( | |
88 | r"(?:sudo.*\|\|.*su\s|system\(.*\+.*\)|process\.env\..*\|\|.*[\x22])" | |
89 | ).unwrap(); | |
90 | ||
91 | // Database anti-patterns | |
92 | pub static ref DATABASE_ANTIPATTERN_REGEX: Regex = Regex::new( | |
93 | r"(?i)(?:SELECT \* FROM|for.*in.*\.execute\(|WHERE.*LIKE\s*\x27%.*%\x27)" | |
94 | ).unwrap(); | |
95 | ||
96 | // LLM-specific comment patterns that indicate AI generation | |
97 | pub static ref LLM_GENERATED_COMMENTS_REGEX: Regex = Regex::new( | |
98 | r"(?i)//.*(?:ai generated|generated by|gpt|claude|chatgpt|copilot|based on|as an ai|llm|machine learning|neural network|deep learning|transformer|attention mechanism)" | |
99 | ).unwrap(); | |
100 | ||
101 | // AI model hallucinated patterns - common incorrect implementations | |
102 | pub static ref AI_MODEL_HALLUCINATION_REGEX: Regex = Regex::new( | |
103 | r"(?i)(?:tensorflow\.keras|torch\.nn\.Module|sklearn\.model_selection\.GridSearchCV|transformers\.pipeline)\s*\(\s*['\x22][^'\x22]*['\x22]\s*\)\s*\.\s*(fit|predict|train|evaluate)\s*\(\s*\)" | |
104 | ).unwrap(); | |
105 | ||
106 | // Incorrect async patterns commonly generated by LLMs | |
107 | pub static ref INCORRECT_ASYNC_REGEX: Regex = Regex::new( | |
108 | r"(?:async\s+function\s+\w+\s*\([^)]*\)\s*\{\s*return\s+await\s+Promise\.resolve\([^;]*\);\s*\}|await\s+\w+\s*\([^)]*\)\s*;?\s*//.*blocking|Promise\.all\([^)]*\)\s*\.\s*then\s*\([^)]*\)\s*await)" | |
109 | ).unwrap(); | |
110 | ||
111 | // Common LLM-generated security anti-patterns | |
112 | pub static ref LLM_SECURITY_ANTIPATTERN_REGEX: Regex = Regex::new( | |
113 | r"(?i)(?:eval\s*\([^)]*req\.|Function\s*\([^)]*req\.|setTimeout\s*\([^)]*req\.|setInterval\s*\([^)]*req\.|innerHTML\s*=.*req\.|outerHTML\s*=.*req\.|document\.write\s*\([^)]*req\.|window\.location\s*=.*req\.|localStorage\.setItem\s*\([^,)]*,\s*req\.|sessionStorage\.setItem\s*\([^,)]*,\s*req\.)" | |
114 | ).unwrap(); | |
115 | ||
116 | // LLM-generated database anti-patterns | |
117 | pub static ref LLM_DB_ANTIPATTERN_REGEX: Regex = Regex::new( | |
118 | r"(?i)(?:SELECT\s+\*\s+FROM\s+\w+\s+WHERE\s+.*=.*\+|INSERT\s+INTO\s+\w+\s+VALUES\s*\([^)]*\+|UPDATE\s+\w+\s+SET\s+.*=.*\+|DELETE\s+FROM\s+\w+\s+WHERE\s+.*=.*\+)" | |
119 | ).unwrap(); | |
120 | ||
121 | // Common LLM-generated error handling mistakes | |
122 | pub static ref LLM_ERROR_HANDLING_MISTAKES_REGEX: Regex = Regex::new( | |
123 | r"(?:try\s*\{\s*[^}]*\}\s*catch\s*\([^)]*\)\s*\{\s*\}\s*//.*ignore|catch\s*\([^)]*\)\s*\{\s*console\.log\s*\([^)]*\)\s*\}\s*//.*log|throw\s+new\s+Error\s*\([^)]*\)\s*;?\s*//.*generic|\.catch\s*\([^)]*\)\s*=>\s*\{\s*\}\s*//.*empty)" | |
124 | ).unwrap(); | |
125 | ||
126 | // LLM-generated performance issues | |
127 | pub static ref LLM_PERFORMANCE_MISTAKES_REGEX: Regex = Regex::new( | |
128 | r"(?:for\s*\([^)]*\)\s*\{\s*[^}]*for\s*\([^)]*\)\s*\{\s*[^}]*for\s*\([^)]*\)\s*\{\s*[^}]*\}\s*\}\s*\}\s*//.*nested|Array\.from\s*\([^)]*\)\s*\.\s*map\s*\([^)]*\)\s*\.\s*filter\s*\([^)]*\)\s*\.\s*reduce\s*\([^)]*\)\s*//.*chain|\.sort\s*\([^)]*\)\s*\.\s*reverse\s*\([^)]*\)\s*//.*inefficient)" | |
129 | ).unwrap(); | |
130 | ||
131 | // LLM-generated incorrect type handling | |
132 | pub static ref LLM_TYPE_MISTAKES_REGEX: Regex = Regex::new( | |
133 | r"(?:let\s+\w+\s*:\s*any\s*=\s*[^;]*;?\s*//.*type|var\s+\w+\s*=\s*[^;]*;?\s*//.*untyped|const\s+\w+\s*=\s*null\s*;?\s*//.*nullable|function\s+\w+\s*\([^)]*\)\s*:\s*any\s*\{[^}]*\}\s*//.*return)" | |
134 | ).unwrap(); | |
135 | } | |
136 | ||
137 | 0 | fn detect_pattern_with_context( |
138 | 0 | content: &str, |
139 | 0 | file_path: &Path, |
140 | 0 | pattern_name: &str, |
141 | 0 | re: &Regex, |
142 | 0 | ) -> Vec<Match> { |
143 | 0 | let mut matches = Vec::new(); |
144 | 0 | for (line_idx, line) in content.lines().enumerate() { |
145 | 0 | for mat in re.find_iter(line) { |
146 | 0 | let context_start = mat.start().saturating_sub(15); |
147 | 0 | let context_end = (mat.end() + 25).min(line.len()); |
148 | 0 | let context = &line[context_start..context_end]; |
149 | 0 | |
150 | 0 | matches.push(Match { |
151 | 0 | file_path: file_path.to_string_lossy().to_string(), |
152 | 0 | line_number: line_idx + 1, |
153 | 0 | column: mat.start() + 1, |
154 | 0 | pattern: pattern_name.to_string(), |
155 | 0 | message: format!("{}: {}", pattern_name, context.trim()), |
156 | 0 | }); |
157 | 0 | } |
158 | } | |
159 | 0 | matches |
160 | 0 | } |
161 | ||
162 | /// Detector for hallucinated APIs commonly generated by LLMs | |
163 | pub struct HallucinatedApiDetector; | |
164 | ||
165 | impl PatternDetector for HallucinatedApiDetector { | |
166 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
167 | 0 | let mut matches = detect_pattern_with_context( |
168 | 0 | content, |
169 | 0 | file_path, |
170 | 0 | "LLM_HALLUCINATED_API", |
171 | 0 | &HALLUCINATED_API_REGEX, |
172 | ); | |
173 | ||
174 | 0 | matches.extend(detect_pattern_with_context( |
175 | 0 | content, |
176 | 0 | file_path, |
177 | 0 | "LLM_INCOMPLETE_API", |
178 | 0 | &INCOMPLETE_API_REGEX, |
179 | )); | |
180 | ||
181 | 0 | matches |
182 | 0 | } |
183 | } | |
184 | ||
185 | /// Detector for SQL injection vulnerabilities common in LLM-generated code | |
186 | pub struct LLMSQLInjectionDetector; | |
187 | ||
188 | impl PatternDetector for LLMSQLInjectionDetector { | |
189 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
190 | 0 | detect_pattern_with_context( |
191 | 0 | content, |
192 | 0 | file_path, |
193 | 0 | "LLM_SQL_INJECTION", |
194 | 0 | &SQL_INJECTION_REGEX, |
195 | ) | |
196 | 0 | } |
197 | } | |
198 | ||
199 | /// Detector for insecure random number generation in security contexts | |
200 | pub struct InsecureRandomDetector; | |
201 | ||
202 | impl PatternDetector for InsecureRandomDetector { | |
203 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
204 | 0 | detect_pattern_with_context( |
205 | 0 | content, |
206 | 0 | file_path, |
207 | 0 | "LLM_INSECURE_RANDOM", |
208 | 0 | &INSECURE_RANDOM_REGEX, |
209 | ) | |
210 | 0 | } |
211 | } | |
212 | ||
213 | /// Detector for hardcoded credentials in LLM-generated code | |
214 | pub struct HardcodedCredentialsDetector; | |
215 | ||
216 | impl PatternDetector for HardcodedCredentialsDetector { | |
217 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
218 | 0 | detect_pattern_with_context( |
219 | 0 | content, |
220 | 0 | file_path, |
221 | 0 | "LLM_HARDCODED_CREDENTIALS", |
222 | 0 | &HARDCODED_CREDENTIALS_REGEX, |
223 | ) | |
224 | 0 | } |
225 | } | |
226 | ||
227 | /// Detector for memory safety issues in Rust code generated by LLMs | |
228 | pub struct RustMemorySafetyDetector; | |
229 | ||
230 | impl PatternDetector for RustMemorySafetyDetector { | |
231 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
232 | 0 | if let Some(ext) = file_path.extension() { |
233 | 0 | if ext == "rs" { |
234 | 0 | return detect_pattern_with_context( |
235 | 0 | content, |
236 | 0 | file_path, |
237 | 0 | "LLM_RUST_MEMORY_SAFETY", |
238 | 0 | &RUST_MEMORY_SAFETY_REGEX, |
239 | ); | |
240 | 0 | } |
241 | 0 | } |
242 | 0 | Vec::new() |
243 | 0 | } |
244 | } | |
245 | ||
246 | /// Detector for async/await anti-patterns | |
247 | pub struct AsyncAntipatternDetector; | |
248 | ||
249 | impl PatternDetector for AsyncAntipatternDetector { | |
250 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
251 | 0 | if let Some(ext) = file_path.extension() { |
252 | 0 | let ext_str = ext.to_string_lossy(); |
253 | 0 | if matches!(ext_str.as_ref(), "js" | "ts" | "jsx" | "tsx" | "rs") { |
254 | 0 | return detect_pattern_with_context( |
255 | 0 | content, |
256 | 0 | file_path, |
257 | 0 | "LLM_ASYNC_ANTIPATTERN", |
258 | 0 | &ASYNC_ANTIPATTERN_REGEX, |
259 | ); | |
260 | 0 | } |
261 | 0 | } |
262 | 0 | Vec::new() |
263 | 0 | } |
264 | } | |
265 | ||
266 | /// Detector for performance anti-patterns in LLM code | |
267 | pub struct PerformanceAntipatternDetector; | |
268 | ||
269 | impl PatternDetector for PerformanceAntipatternDetector { | |
270 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
271 | 0 | detect_pattern_with_context( |
272 | 0 | content, |
273 | 0 | file_path, |
274 | 0 | "LLM_PERFORMANCE_ISSUE", |
275 | 0 | &PERFORMANCE_ANTIPATTERN_REGEX, |
276 | ) | |
277 | 0 | } |
278 | } | |
279 | ||
280 | /// Detector for poor error handling patterns | |
281 | pub struct ErrorHandlingDetector; | |
282 | ||
283 | impl PatternDetector for ErrorHandlingDetector { | |
284 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
285 | 0 | detect_pattern_with_context( |
286 | 0 | content, |
287 | 0 | file_path, |
288 | 0 | "LLM_ERROR_HANDLING", |
289 | 0 | &ERROR_HANDLING_REGEX, |
290 | ) | |
291 | 0 | } |
292 | } | |
293 | ||
294 | /// Detector for cryptographic anti-patterns | |
295 | pub struct CryptoAntipatternDetector; | |
296 | ||
297 | impl PatternDetector for CryptoAntipatternDetector { | |
298 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
299 | 0 | detect_pattern_with_context( |
300 | 0 | content, |
301 | 0 | file_path, |
302 | 0 | "LLM_CRYPTO_ANTIPATTERN", |
303 | 0 | &CRYPTO_ANTIPATTERN_REGEX, |
304 | ) | |
305 | 0 | } |
306 | } | |
307 | ||
308 | /// Detector for over-engineering patterns | |
309 | pub struct OverengineeringDetector; | |
310 | ||
311 | impl PatternDetector for OverengineeringDetector { | |
312 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
313 | 0 | detect_pattern_with_context( |
314 | 0 | content, |
315 | 0 | file_path, |
316 | 0 | "LLM_OVERENGINEERING", |
317 | 0 | &OVERENGINEERING_REGEX, |
318 | ) | |
319 | 0 | } |
320 | } | |
321 | ||
322 | /// Detector for XSS and code injection vulnerabilities | |
323 | pub struct XSSInjectionDetector; | |
324 | ||
325 | impl PatternDetector for XSSInjectionDetector { | |
326 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
327 | 0 | detect_pattern_with_context( |
328 | 0 | content, |
329 | 0 | file_path, |
330 | 0 | "LLM_XSS_INJECTION", |
331 | 0 | &XSS_INJECTION_REGEX, |
332 | ) | |
333 | 0 | } |
334 | } | |
335 | ||
336 | /// Detector for file system security issues | |
337 | pub struct FilesystemSecurityDetector; | |
338 | ||
339 | impl PatternDetector for FilesystemSecurityDetector { | |
340 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
341 | 0 | detect_pattern_with_context( |
342 | 0 | content, |
343 | 0 | file_path, |
344 | 0 | "LLM_FILESYSTEM_SECURITY", |
345 | 0 | &FILESYSTEM_SECURITY_REGEX, |
346 | ) | |
347 | 0 | } |
348 | } | |
349 | ||
350 | /// Detector for configuration anti-patterns | |
351 | pub struct ConfigAntipatternDetector; | |
352 | ||
353 | impl PatternDetector for ConfigAntipatternDetector { | |
354 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
355 | 0 | detect_pattern_with_context( |
356 | 0 | content, |
357 | 0 | file_path, |
358 | 0 | "LLM_CONFIG_ANTIPATTERN", |
359 | 0 | &CONFIG_ANTIPATTERN_REGEX, |
360 | ) | |
361 | 0 | } |
362 | } | |
363 | ||
364 | /// Detector for JavaScript-specific LLM issues | |
365 | pub struct JSLLMIssuesDetector; | |
366 | ||
367 | impl PatternDetector for JSLLMIssuesDetector { | |
368 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
369 | 0 | if let Some(ext) = file_path.extension() { |
370 | 0 | let ext_str = ext.to_string_lossy(); |
371 | 0 | if matches!( |
372 | 0 | ext_str.as_ref(), |
373 | 0 | "js" | "ts" | "jsx" | "tsx" | "vue" | "svelte" |
374 | ) { | |
375 | 0 | return detect_pattern_with_context( |
376 | 0 | content, |
377 | 0 | file_path, |
378 | 0 | "LLM_JS_ISSUES", |
379 | 0 | &JS_LLM_ISSUES_REGEX, |
380 | ); | |
381 | 0 | } |
382 | 0 | } |
383 | 0 | Vec::new() |
384 | 0 | } |
385 | } | |
386 | ||
387 | /// Detector for Python-specific LLM issues | |
388 | pub struct PythonLLMIssuesDetector; | |
389 | ||
390 | impl PatternDetector for PythonLLMIssuesDetector { | |
391 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
392 | 0 | if let Some(ext) = file_path.extension() { |
393 | 0 | if ext == "py" { |
394 | 0 | return detect_pattern_with_context( |
395 | 0 | content, |
396 | 0 | file_path, |
397 | 0 | "LLM_PYTHON_ISSUES", |
398 | 0 | &PYTHON_LLM_ISSUES_REGEX, |
399 | ); | |
400 | 0 | } |
401 | 0 | } |
402 | 0 | Vec::new() |
403 | 0 | } |
404 | } | |
405 | ||
406 | /// Detector for security context confusion | |
407 | pub struct ContextConfusionDetector; | |
408 | ||
409 | impl PatternDetector for ContextConfusionDetector { | |
410 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
411 | 0 | detect_pattern_with_context( |
412 | 0 | content, |
413 | 0 | file_path, |
414 | 0 | "LLM_CONTEXT_CONFUSION", |
415 | 0 | &CONTEXT_CONFUSION_REGEX, |
416 | ) | |
417 | 0 | } |
418 | } | |
419 | ||
420 | /// Detector for database anti-patterns | |
421 | pub struct DatabaseAntipatternDetector; | |
422 | ||
423 | impl PatternDetector for DatabaseAntipatternDetector { | |
424 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
425 | 0 | detect_pattern_with_context( |
426 | 0 | content, |
427 | 0 | file_path, |
428 | 0 | "LLM_DATABASE_ANTIPATTERN", |
429 | 0 | &DATABASE_ANTIPATTERN_REGEX, |
430 | ) | |
431 | 0 | } |
432 | } | |
433 | ||
434 | /// Detector for comments indicating LLM-generated code | |
435 | pub struct LLMGeneratedCommentsDetector; | |
436 | ||
437 | impl PatternDetector for LLMGeneratedCommentsDetector { | |
438 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
439 | 0 | detect_pattern_with_context( |
440 | 0 | content, |
441 | 0 | file_path, |
442 | 0 | "LLM_GENERATED_COMMENT", |
443 | 0 | &LLM_GENERATED_COMMENTS_REGEX, |
444 | ) | |
445 | 0 | } |
446 | } | |
447 | ||
448 | /// Detector for AI model hallucinated patterns | |
449 | pub struct AIModelHallucinationDetector; | |
450 | ||
451 | impl PatternDetector for AIModelHallucinationDetector { | |
452 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
453 | 0 | detect_pattern_with_context( |
454 | 0 | content, |
455 | 0 | file_path, |
456 | 0 | "LLM_AI_MODEL_HALLUCINATION", |
457 | 0 | &AI_MODEL_HALLUCINATION_REGEX, |
458 | ) | |
459 | 0 | } |
460 | } | |
461 | ||
462 | /// Detector for incorrect async patterns | |
463 | pub struct IncorrectAsyncDetector; | |
464 | ||
465 | impl PatternDetector for IncorrectAsyncDetector { | |
466 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
467 | 0 | detect_pattern_with_context( |
468 | 0 | content, |
469 | 0 | file_path, |
470 | 0 | "LLM_INCORRECT_ASYNC", |
471 | 0 | &INCORRECT_ASYNC_REGEX, |
472 | ) | |
473 | 0 | } |
474 | } | |
475 | ||
476 | /// Detector for LLM-generated security anti-patterns | |
477 | pub struct LLMSecurityAntipatternDetector; | |
478 | ||
479 | impl PatternDetector for LLMSecurityAntipatternDetector { | |
480 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
481 | 0 | detect_pattern_with_context( |
482 | 0 | content, |
483 | 0 | file_path, |
484 | 0 | "LLM_SECURITY_ANTIPATTERN", |
485 | 0 | &LLM_SECURITY_ANTIPATTERN_REGEX, |
486 | ) | |
487 | 0 | } |
488 | } | |
489 | ||
490 | /// Detector for LLM-generated database anti-patterns | |
491 | pub struct LLMDBAntipatternDetector; | |
492 | ||
493 | impl PatternDetector for LLMDBAntipatternDetector { | |
494 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
495 | 0 | detect_pattern_with_context( |
496 | 0 | content, |
497 | 0 | file_path, |
498 | 0 | "LLM_DB_ANTIPATTERN", |
499 | 0 | &LLM_DB_ANTIPATTERN_REGEX, |
500 | ) | |
501 | 0 | } |
502 | } | |
503 | ||
504 | /// Detector for LLM-generated error handling mistakes | |
505 | pub struct LLMErrorHandlingMistakesDetector; | |
506 | ||
507 | impl PatternDetector for LLMErrorHandlingMistakesDetector { | |
508 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
509 | 0 | detect_pattern_with_context( |
510 | 0 | content, |
511 | 0 | file_path, |
512 | 0 | "LLM_ERROR_HANDLING_MISTAKE", |
513 | 0 | &LLM_ERROR_HANDLING_MISTAKES_REGEX, |
514 | ) | |
515 | 0 | } |
516 | } | |
517 | ||
518 | /// Detector for LLM-generated performance mistakes | |
519 | pub struct LLMPerformanceMistakesDetector; | |
520 | ||
521 | impl PatternDetector for LLMPerformanceMistakesDetector { | |
522 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
523 | 0 | detect_pattern_with_context( |
524 | 0 | content, |
525 | 0 | file_path, |
526 | 0 | "LLM_PERFORMANCE_MISTAKE", |
527 | 0 | &LLM_PERFORMANCE_MISTAKES_REGEX, |
528 | ) | |
529 | 0 | } |
530 | } | |
531 | ||
532 | /// Detector for LLM-generated type handling mistakes | |
533 | pub struct LLMTypeMistakesDetector; | |
534 | ||
535 | impl PatternDetector for LLMTypeMistakesDetector { | |
536 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
537 | 0 | detect_pattern_with_context( |
538 | 0 | content, |
539 | 0 | file_path, |
540 | 0 | "LLM_TYPE_MISTAKE", |
541 | 0 | &LLM_TYPE_MISTAKES_REGEX, |
542 | ) | |
543 | 0 | } |
544 | } | |
545 | ||
546 | /// Comprehensive LLM vulnerability detector that combines multiple patterns | |
547 | pub struct ComprehensiveLLMDetector { | |
548 | detectors: Vec<Box<dyn PatternDetector>>, | |
549 | } | |
550 | ||
551 | impl ComprehensiveLLMDetector { | |
552 | 0 | pub fn new() -> Self { |
553 | 0 | let detectors: Vec<Box<dyn PatternDetector>> = vec![ |
554 | 0 | Box::new(HallucinatedApiDetector), |
555 | 0 | Box::new(LLMSQLInjectionDetector), |
556 | 0 | Box::new(InsecureRandomDetector), |
557 | 0 | Box::new(HardcodedCredentialsDetector), |
558 | 0 | Box::new(RustMemorySafetyDetector), |
559 | 0 | Box::new(AsyncAntipatternDetector), |
560 | 0 | Box::new(PerformanceAntipatternDetector), |
561 | 0 | Box::new(ErrorHandlingDetector), |
562 | 0 | Box::new(CryptoAntipatternDetector), |
563 | 0 | Box::new(OverengineeringDetector), |
564 | 0 | Box::new(XSSInjectionDetector), |
565 | 0 | Box::new(FilesystemSecurityDetector), |
566 | 0 | Box::new(ConfigAntipatternDetector), |
567 | 0 | Box::new(JSLLMIssuesDetector), |
568 | 0 | Box::new(PythonLLMIssuesDetector), |
569 | 0 | Box::new(ContextConfusionDetector), |
570 | 0 | Box::new(DatabaseAntipatternDetector), |
571 | 0 | Box::new(LLMGeneratedCommentsDetector), |
572 | 0 | Box::new(AIModelHallucinationDetector), |
573 | 0 | Box::new(IncorrectAsyncDetector), |
574 | 0 | Box::new(LLMSecurityAntipatternDetector), |
575 | 0 | Box::new(LLMDBAntipatternDetector), |
576 | 0 | Box::new(LLMErrorHandlingMistakesDetector), |
577 | 0 | Box::new(LLMPerformanceMistakesDetector), |
578 | 0 | Box::new(LLMTypeMistakesDetector), |
579 | ]; | |
580 | ||
581 | 0 | Self { detectors } |
582 | 0 | } |
583 | } | |
584 | ||
585 | impl Default for ComprehensiveLLMDetector { | |
586 | 0 | fn default() -> Self { |
587 | 0 | Self::new() |
588 | 0 | } |
589 | } | |
590 | ||
591 | impl PatternDetector for ComprehensiveLLMDetector { | |
592 | 0 | fn detect(&self, content: &str, file_path: &Path) -> Vec<Match> { |
593 | 0 | let mut all_matches = Vec::new(); |
594 | ||
595 | 0 | for detector in &self.detectors { |
596 | 0 | all_matches.extend(detector.detect(content, file_path)); |
597 | 0 | } |
598 | ||
599 | 0 | all_matches |
600 | 0 | } |
601 | } | |
602 | ||
603 | #[cfg(test)] | |
604 | mod tests { | |
605 | use super::*; | |
606 | use std::path::PathBuf; | |
607 | ||
608 | #[test] | |
609 | fn test_hallucinated_api_detector() { | |
610 | let detector = HallucinatedApiDetector; | |
611 | let content = "user.authenticate(); data.validateInput(); crypto.secureRandom();"; | |
612 | let path = PathBuf::from("test.js"); | |
613 | let matches = detector.detect(content, &path); | |
614 | assert!(matches.len() >= 3); | |
615 | assert!(matches.iter().any(|m| m.pattern == "LLM_HALLUCINATED_API")); | |
616 | } | |
617 | ||
618 | #[test] | |
619 | fn test_sql_injection_detector() { | |
620 | let detector = LLMSQLInjectionDetector; | |
621 | let content = r#"query("SELECT * FROM users WHERE id = " + userId);"#; | |
622 | let path = PathBuf::from("test.js"); | |
623 | let matches = detector.detect(content, &path); | |
624 | assert_eq!(matches.len(), 1); | |
625 | assert_eq!(matches[0].pattern, "LLM_SQL_INJECTION"); | |
626 | } | |
627 | ||
628 | #[test] | |
629 | fn test_hardcoded_credentials_detector() { | |
630 | let detector = HardcodedCredentialsDetector; | |
631 | let content = | |
632 | r#"const password = "mySecretPass123"; const api_key = "sk-1234567890abcdef";"#; | |
633 | let path = PathBuf::from("test.js"); | |
634 | let matches = detector.detect(content, &path); | |
635 | assert!(matches.len() >= 2); | |
636 | assert!(matches | |
637 | .iter() | |
638 | .any(|m| m.pattern == "LLM_HARDCODED_CREDENTIALS")); | |
639 | } | |
640 | ||
641 | #[test] | |
642 | fn test_rust_memory_safety_detector() { | |
643 | let detector = RustMemorySafetyDetector; | |
644 | let content = | |
645 | "let value = ptr.unwrap(); // safe because we checked\nunsafe { transmute(data) }"; | |
646 | let path = PathBuf::from("test.rs"); | |
647 | let matches = detector.detect(content, &path); | |
648 | assert!(!matches.is_empty()); | |
649 | assert_eq!(matches[0].pattern, "LLM_RUST_MEMORY_SAFETY"); | |
650 | } | |
651 | ||
652 | #[test] | |
653 | fn test_async_antipattern_detector() { | |
654 | let detector = AsyncAntipatternDetector; | |
655 | let content = | |
656 | "await someVar; data.then(await processData); return await Promise.resolve(value);"; | |
657 | let path = PathBuf::from("test.js"); | |
658 | let matches = detector.detect(content, &path); | |
659 | assert!(!matches.is_empty()); | |
660 | assert_eq!(matches[0].pattern, "LLM_ASYNC_ANTIPATTERN"); | |
661 | } | |
662 | ||
663 | #[test] | |
664 | fn test_crypto_antipattern_detector() { | |
665 | let detector = CryptoAntipatternDetector; | |
666 | let content = "const hash = MD5(password); const cipher = AES.ECB.encrypt(data);"; | |
667 | let path = PathBuf::from("test.js"); | |
668 | let matches = detector.detect(content, &path); | |
669 | assert!(!matches.is_empty()); | |
670 | assert_eq!(matches[0].pattern, "LLM_CRYPTO_ANTIPATTERN"); | |
671 | } | |
672 | ||
673 | #[test] | |
674 | fn test_js_llm_issues_detector() { | |
675 | let detector = JSLLMIssuesDetector; | |
676 | let content = "if (value == null) { } parseInt(str); JSON.parse(data);"; | |
677 | let path = PathBuf::from("test.js"); | |
678 | let matches = detector.detect(content, &path); | |
679 | assert!(matches.len() >= 2); | |
680 | assert_eq!(matches[0].pattern, "LLM_JS_ISSUES"); | |
681 | } | |
682 | ||
683 | #[test] | |
684 | fn test_python_llm_issues_detector() { | |
685 | let detector = PythonLLMIssuesDetector; | |
686 | let content = "exec(input('Enter code: ')); eval(user_input); pickle.loads(request.data);"; | |
687 | let path = PathBuf::from("test.py"); | |
688 | let matches = detector.detect(content, &path); | |
689 | assert!(matches.len() >= 2); | |
690 | assert_eq!(matches[0].pattern, "LLM_PYTHON_ISSUES"); | |
691 | } | |
692 | ||
693 | #[test] | |
694 | fn test_comprehensive_llm_detector() { | |
695 | let detector = ComprehensiveLLMDetector::new(); | |
696 | let content = r#" | |
697 | user.authenticate(); // Common LLM hallucination | |
698 | const password = "hardcoded123"; | |
699 | query("SELECT * FROM users WHERE id = " + id); | |
700 | if (value == null) { } | |
701 | "#; | |
702 | let path = PathBuf::from("test.js"); | |
703 | let matches = detector.detect(content, &path); | |
704 | ||
705 | // Should detect multiple issues | |
706 | assert!(matches.len() >= 3); | |
707 | ||
708 | let patterns: Vec<&str> = matches.iter().map(|m| m.pattern.as_str()).collect(); | |
709 | assert!(patterns.contains(&"LLM_HALLUCINATED_API")); | |
710 | assert!(patterns.contains(&"LLM_HARDCODED_CREDENTIALS")); | |
711 | assert!(patterns.contains(&"LLM_SQL_INJECTION")); | |
712 | } | |
713 | ||
714 | #[test] | |
715 | fn test_llm_generated_comments_detector() { | |
716 | let detector = LLMGeneratedCommentsDetector; | |
717 | let content = "// This code was generated by ChatGPT\n// AI generated function\n// Based on GPT-4 suggestions"; | |
718 | let path = PathBuf::from("test.js"); | |
719 | let matches = detector.detect(content, &path); | |
720 | assert!(matches.len() >= 2); | |
721 | assert!(matches.iter().all(|m| m.pattern == "LLM_GENERATED_COMMENT")); | |
722 | } | |
723 | ||
724 | #[test] | |
725 | fn test_database_antipattern_detector() { | |
726 | let detector = DatabaseAntipatternDetector; | |
727 | let content = "SELECT * FROM users; for user in users: db.execute(query);"; | |
728 | let path = PathBuf::from("test.sql"); | |
729 | let matches = detector.detect(content, &path); | |
730 | assert!(!matches.is_empty()); | |
731 | assert_eq!(matches[0].pattern, "LLM_DATABASE_ANTIPATTERN"); | |
732 | } | |
733 | ||
734 | #[test] | |
735 | fn test_file_extension_filtering() { | |
736 | let rust_detector = RustMemorySafetyDetector; | |
737 | let js_detector = JSLLMIssuesDetector; | |
738 | let python_detector = PythonLLMIssuesDetector; | |
739 | ||
740 | let content = "transmute(data); parseInt(str); exec(input());"; | |
741 | ||
742 | let rust_path = PathBuf::from("test.rs"); | |
743 | let js_path = PathBuf::from("test.js"); | |
744 | let py_path = PathBuf::from("test.py"); | |
745 | let txt_path = PathBuf::from("test.txt"); | |
746 | ||
747 | // Rust detector should only work on .rs files | |
748 | assert!(!rust_detector.detect(content, &rust_path).is_empty()); | |
749 | assert_eq!(rust_detector.detect(content, &js_path).len(), 0); | |
750 | ||
751 | // JS detector should only work on JS/TS files | |
752 | assert!(!js_detector.detect(content, &js_path).is_empty()); | |
753 | assert_eq!(js_detector.detect(content, &txt_path).len(), 0); | |
754 | ||
755 | // Python detector should only work on .py files | |
756 | assert!(!python_detector.detect(content, &py_path).is_empty()); | |
757 | assert_eq!(python_detector.detect(content, &js_path).len(), 0); | |
758 | } | |
759 | } |
Line | Count | Source |
1 | use anyhow::Result; | |
2 | use std::sync::Arc; | |
3 | use std::time::{Duration, Instant}; | |
4 | use sysinfo::System; | |
5 | use tokio::sync::Mutex; | |
6 | use tokio::time; | |
7 | use tracing::{error, info, warn}; | |
8 | ||
9 | /// Performance monitor for tracking execution times and resource usage | |
10 | #[derive(Debug)] | |
11 | pub struct PerformanceMonitor { | |
12 | system: Arc<Mutex<System>>, | |
13 | start_time: Instant, | |
14 | operation_start: Option<Instant>, | |
15 | timeout_duration: Duration, | |
16 | memory_threshold_mb: usize, | |
17 | cpu_threshold_percent: f64, | |
18 | } | |
19 | ||
20 | impl PerformanceMonitor { | |
21 | /// Create a new performance monitor with default thresholds | |
22 | 3 | pub fn new() -> Self { |
23 | 3 | Self::with_thresholds(Duration::from_secs(300), 1024, 90.0) // 5 min timeout, 1GB memory, 90% CPU |
24 | 3 | } |
25 | ||
26 | /// Create a new performance monitor with custom thresholds | |
27 | 3 | pub fn with_thresholds( |
28 | 3 | timeout: Duration, |
29 | 3 | memory_threshold_mb: usize, |
30 | 3 | cpu_threshold_percent: f64, |
31 | 3 | ) -> Self { |
32 | 3 | let mut system = System::new_all(); |
33 | 3 | system.refresh_all(); |
34 | ||
35 | 3 | Self { |
36 | 3 | system: Arc::new(Mutex::new(system)), |
37 | 3 | start_time: Instant::now(), |
38 | 3 | operation_start: None, |
39 | 3 | timeout_duration: timeout, |
40 | 3 | memory_threshold_mb, |
41 | 3 | cpu_threshold_percent, |
42 | 3 | } |
43 | 3 | } |
44 | ||
45 | /// Start monitoring an operation | |
46 | 2 | pub fn start_operation(&mut self, operation_name: &str) { |
47 | 2 | self.operation_start = Some(Instant::now()); |
48 | 2 | info!( |
49 | 2 | } |
50 | ||
51 | /// End monitoring an operation and log metrics | |
52 | 2 | pub async fn end_operation(&mut self, operation_name: &str) -> Result<(), anyhow::Error> { |
53 | 2 | let duration = match self.operation_start.take() { |
54 | 2 | Some(start) => start.elapsed(), |
55 | 0 | None => return Err(anyhow::anyhow!("No operation started")), |
56 | }; | |
57 | ||
58 | 2 | let metrics = self.collect_metrics().await |
59 | 2 | info!( |
60 | 0 | "Completed operation: {} in {:?} - CPU: {:.1}%, Memory: {:.1}MB", |
61 | operation_name, duration, metrics.cpu_usage, metrics.memory_usage_mb | |
62 | ); | |
63 | ||
64 | // Check thresholds | |
65 | 2 | if duration > self.timeout_duration { |
66 | 0 | warn!( |
67 | 0 | "Operation {} exceeded timeout threshold: {:?} > {:?}", |
68 | operation_name, duration, self.timeout_duration | |
69 | ); | |
70 | 2 | } |
71 | 2 | if metrics.memory_usage_mb > self.memory_threshold_mb as f64 { |
72 | 2 | warn!( |
73 | 0 | "Operation {} exceeded memory threshold: {:.1}MB > {}MB", |
74 | operation_name, metrics.memory_usage_mb, self.memory_threshold_mb | |
75 | ); | |
76 | 0 | } |
77 | 2 | if metrics.cpu_usage > self.cpu_threshold_percent { |
78 | 0 | warn!( |
79 | 0 | "Operation {} exceeded CPU threshold: {:.1}% > {:.1}%", |
80 | operation_name, metrics.cpu_usage, self.cpu_threshold_percent | |
81 | ); | |
82 | 2 | } |
83 | ||
84 | 2 | Ok(()) |
85 | 2 | } |
86 | ||
87 | /// Collect current system metrics | |
88 | 2 | pub async fn collect_metrics(&self) -> Result<SystemMetrics, anyhow::Error> { |
89 | 2 | let mut system = self.system.lock().await; |
90 | 2 | system.refresh_all(); |
91 | ||
92 | 2 | let cpu_usage = system.global_cpu_info().cpu_usage() as f64; |
93 | 2 | let memory_usage_mb = system.used_memory() as f64 / 1024.0 / 1024.0; |
94 | ||
95 | 2 | Ok(SystemMetrics { |
96 | 2 | cpu_usage, |
97 | 2 | memory_usage_mb, |
98 | 2 | total_memory_mb: system.total_memory() as f64 / 1024.0 / 1024.0, |
99 | 2 | }) |
100 | 2 | } |
101 | ||
102 | /// Start async monitoring task that logs metrics periodically | |
103 | 0 | pub async fn start_async_monitoring(&self, interval: Duration) { |
104 | 0 | let system = Arc::clone(&self.system); |
105 | 0 | let timeout_duration = self.timeout_duration; |
106 | ||
107 | 0 | tokio::spawn(async move { |
108 | 0 | let mut interval = time::interval(interval); |
109 | 0 | let start_time = Instant::now(); |
110 | ||
111 | loop { | |
112 | 0 | interval.tick().await; |
113 | ||
114 | 0 | let elapsed = start_time.elapsed(); |
115 | 0 | if elapsed > timeout_duration { |
116 | 0 | error!( |
117 | 0 | "Monitoring timeout exceeded: {:?} > {:?}", |
118 | elapsed, timeout_duration | |
119 | ); | |
120 | 0 | break; |
121 | 0 | } |
122 | ||
123 | 0 | let mut sys = system.lock().await; |
124 | 0 | sys.refresh_all(); |
125 | ||
126 | 0 | let cpu = sys.global_cpu_info().cpu_usage(); |
127 | 0 | let mem_mb = sys.used_memory() as f64 / 1024.0 / 1024.0; |
128 | ||
129 | 0 | info!( |
130 | 0 | "Monitoring - Elapsed: {:?}, CPU: {:.1}%, Memory: {:.1}MB", |
131 | elapsed, cpu, mem_mb | |
132 | ); | |
133 | } | |
134 | 0 | }); |
135 | 0 | } |
136 | ||
137 | /// Get total elapsed time since monitor creation | |
138 | 0 | pub fn total_elapsed(&self) -> Duration { |
139 | 0 | self.start_time.elapsed() |
140 | 0 | } |
141 | } | |
142 | ||
143 | impl Default for PerformanceMonitor { | |
144 | 0 | fn default() -> Self { |
145 | 0 | Self::new() |
146 | 0 | } |
147 | } | |
148 | ||
149 | /// System metrics snapshot | |
150 | #[derive(Debug, Clone)] | |
151 | pub struct SystemMetrics { | |
152 | pub cpu_usage: f64, | |
153 | pub memory_usage_mb: f64, | |
154 | pub total_memory_mb: f64, | |
155 | } | |
156 | ||
157 | /// Async operation wrapper with monitoring | |
158 | pub struct MonitoredOperation<T> { | |
159 | monitor: PerformanceMonitor, | |
160 | operation_name: String, | |
161 | _phantom: std::marker::PhantomData<T>, | |
162 | } | |
163 | ||
164 | impl<T> MonitoredOperation<T> { | |
165 | 0 | pub fn new(operation_name: &str) -> Self { |
166 | 0 | Self { |
167 | 0 | monitor: PerformanceMonitor::new(), |
168 | 0 | operation_name: operation_name.to_string(), |
169 | 0 | _phantom: std::marker::PhantomData, |
170 | 0 | } |
171 | 0 | } |
172 | ||
173 | 0 | pub async fn execute<F, Fut>(&mut self, operation: F) -> Result<T, anyhow::Error> |
174 | 0 | where |
175 | 0 | F: FnOnce() -> Fut, |
176 | 0 | Fut: std::future::Future<Output = Result<T, String>>, |
177 | 0 | { |
178 | 0 | self.monitor.start_operation(&self.operation_name); |
179 | ||
180 | // Start monitoring | |
181 | 0 | self.monitor |
182 | 0 | .start_async_monitoring(Duration::from_secs(10)) |
183 | 0 | .await; |
184 | ||
185 | // Execute with timeout | |
186 | 0 | let result = time::timeout(self.monitor.timeout_duration, operation()) |
187 | 0 | .await |
188 | 0 | .map_err(|_| { |
189 | 0 | anyhow::anyhow!( |
190 | 0 | "Operation {} timed out after {:?}", |
191 | self.operation_name, | |
192 | self.monitor.timeout_duration | |
193 | ) | |
194 | 0 | })? |
195 | 0 | .map_err(|e| anyhow::anyhow!("Operation {} failed: {}", self.operation_name, e))?; |
196 | ||
197 | 0 | self.monitor.end_operation(&self.operation_name).await?; |
198 | ||
199 | 0 | Ok(result) |
200 | 0 | } |
201 | } | |
202 | ||
203 | #[cfg(test)] | |
204 | mod tests { | |
205 | use super::*; | |
206 | use tokio::time::sleep; | |
207 | ||
208 | #[tokio::test] | |
209 | async fn test_performance_monitor() { | |
210 | let mut monitor = PerformanceMonitor::new(); | |
211 | monitor.start_operation("test_op"); | |
212 | ||
213 | sleep(Duration::from_millis(100)).await; | |
214 | ||
215 | let result = monitor.end_operation("test_op").await; | |
216 | assert!(result.is_ok()); | |
217 | ||
218 | let metrics = monitor.collect_metrics().await.unwrap(); | |
219 | assert!(metrics.cpu_usage >= 0.0); | |
220 | assert!(metrics.memory_usage_mb >= 0.0); | |
221 | } | |
222 | ||
223 | #[tokio::test] | |
224 | async fn test_monitored_operation() { | |
225 | let mut monitored = MonitoredOperation::<String>::new("test_async_op"); | |
226 | ||
227 | let result = monitored | |
228 | .execute(|| async { | |
229 | sleep(Duration::from_millis(50)).await; | |
230 | Ok("success".to_string()) | |
231 | }) | |
232 | .await; | |
233 | ||
234 | assert!(result.is_ok()); | |
235 | assert_eq!(result.unwrap(), "success"); | |
236 | } | |
237 | } |
Line | Count | Source |
1 | use crate::{Match, PatternDetector}; | |
2 | use anyhow::Result; | |
3 | use dashmap::DashMap; | |
4 | use ignore::WalkBuilder; | |
5 | use memmap2::Mmap; | |
6 | use rayon::prelude::*; | |
7 | use std::fs::File; | |
8 | use std::path::Path; | |
9 | use std::sync::atomic::{AtomicUsize, Ordering}; | |
10 | use std::time::Instant; | |
11 | ||
12 | /// Performance metrics for scanning operations | |
13 | #[derive(Debug, Clone)] | |
14 | pub struct ScanMetrics { | |
15 | pub total_files_scanned: usize, | |
16 | pub total_lines_processed: usize, | |
17 | pub total_matches_found: usize, | |
18 | pub scan_duration_ms: u64, | |
19 | pub cache_hits: usize, | |
20 | pub cache_misses: usize, | |
21 | } | |
22 | ||
23 | /// Optimized scanner with performance enhancements | |
24 | pub struct OptimizedScanner { | |
25 | detectors: Vec<Box<dyn PatternDetector>>, | |
26 | cache: DashMap<String, Vec<Match>>, | |
27 | file_cache: DashMap<String, (u64, Vec<Match>)>, // (modified_time, matches) | |
28 | max_cache_size: usize, | |
29 | } | |
30 | ||
31 | impl OptimizedScanner { | |
32 | /// Creates a new optimized scanner with the given pattern detectors | |
33 | 4 | pub fn new(detectors: Vec<Box<dyn PatternDetector>>) -> Self { |
34 | 4 | Self { |
35 | 4 | detectors, |
36 | 4 | cache: DashMap::new(), |
37 | 4 | file_cache: DashMap::new(), |
38 | 4 | max_cache_size: 1000, // Maximum number of cached file results |
39 | 4 | } |
40 | 4 | } |
41 | ||
42 | /// Set maximum cache size | |
43 | 3 | pub fn with_cache_size(mut self, size: usize) -> Self { |
44 | 3 | self.max_cache_size = size; |
45 | 3 | self |
46 | 3 | } |
47 | ||
48 | /// Optimized scan with performance improvements | |
49 | 4 | pub fn scan_optimized(&self, root: &Path) -> Result<(Vec<Match>, ScanMetrics)> { |
50 | 4 | let start_time = Instant::now(); |
51 | 4 | let files_processed = AtomicUsize::new(0); |
52 | 4 | let lines_processed = AtomicUsize::new(0); |
53 | 4 | let cache_hits = AtomicUsize::new(0); |
54 | 4 | let cache_misses = AtomicUsize::new(0); |
55 | ||
56 | // Pre-compile regex patterns and optimize file filtering | |
57 | 4 | let matches: Vec<Match> = WalkBuilder::new(root) |
58 | 4 | .standard_filters(true) // Use gitignore, etc. |
59 | 4 | .build() |
60 | 4 | .par_bridge() |
61 | 9 | . |
62 | 9 | let |
63 | 8 | let file_type = entry.file_type() |
64 | ||
65 | 8 | if !file_type.is_file() { |
66 | 3 | return None; |
67 | 5 | } |
68 | ||
69 | 5 | let path = entry.path(); |
70 | ||
71 | // Skip binary files and large files early | |
72 | 5 | if !self.should_scan_file(path) { |
73 | 0 | return None; |
74 | 5 | } |
75 | ||
76 | 5 | files_processed.fetch_add(1, Ordering::Relaxed); |
77 | ||
78 | 5 | let path_str = path.to_string_lossy().to_string(); |
79 | ||
80 | // Check file-based cache with modification time | |
81 | 5 | if let Some( |
82 | 0 | cache_hits.fetch_add(1, Ordering::Relaxed); |
83 | 0 | return Some(cached_result); |
84 | 5 | } |
85 | ||
86 | 5 | cache_misses.fetch_add(1, Ordering::Relaxed); |
87 | ||
88 | // Read and process file | |
89 | 5 | let |
90 | 3 | lines_processed.fetch_add(content.lines().count(), Ordering::Relaxed); |
91 | ||
92 | // Use optimized parallel processing for detectors | |
93 | 3 | let file_matches: Vec<Match> = if self.detectors.len() > 3 { |
94 | // For many detectors, use parallel processing | |
95 | 0 | self.detectors |
96 | 0 | .par_iter() |
97 | 0 | .flat_map(|detector| detector.detect(&content, path)) |
98 | 0 | .collect() |
99 | } else { | |
100 | // For few detectors, sequential is faster (less overhead) | |
101 | 3 | self.detectors |
102 | 3 | .iter() |
103 | 6 | . |
104 | 3 | .collect() |
105 | }; | |
106 | ||
107 | // Cache the result with file modification time | |
108 | 3 | self.cache_result(path, &path_str, &file_matches); |
109 | ||
110 | 3 | Some(file_matches) |
111 | 9 | }) |
112 | 4 | .flatten() |
113 | 4 | .collect(); |
114 | ||
115 | 4 | let duration = start_time.elapsed(); |
116 | ||
117 | 4 | let metrics = ScanMetrics { |
118 | 4 | total_files_scanned: files_processed.load(Ordering::Relaxed), |
119 | 4 | total_lines_processed: lines_processed.load(Ordering::Relaxed), |
120 | 4 | total_matches_found: matches.len(), |
121 | 4 | scan_duration_ms: duration.as_millis() as u64, |
122 | 4 | cache_hits: cache_hits.load(Ordering::Relaxed), |
123 | 4 | cache_misses: cache_misses.load(Ordering::Relaxed), |
124 | 4 | }; |
125 | ||
126 | 4 | Ok((matches, metrics)) |
127 | 4 | } |
128 | ||
129 | /// Check if a file should be scanned based on size and type | |
130 | 5 | fn should_scan_file(&self, path: &Path) -> bool { |
131 | // Check file extension | |
132 | 5 | if let Some(ext) = path.extension().and_then(|s| s.to_str()) { |
133 | 5 | match ext.to_lowercase().as_str() { |
134 | // Skip binary files | |
135 | 5 | "exe" | "dll" | "so" | "dylib" | "bin" | "obj" | "o" | "a" | "lib" => return |
136 | // Skip image files | |
137 | 5 | "png" | "jpg" | "jpeg" | "gif" | "svg" | "ico" | "bmp" | "tiff" => return |
138 | // Skip compressed files | |
139 | 5 | "zip" | "tar" | "gz" | "rar" | "7z" | "bz2" | "xz" => return |
140 | // Skip media files | |
141 | 5 | "mp3" | "mp4" | "avi" | "mov" | "wav" | "flac" => return |
142 | 5 | _ => {} |
143 | } | |
144 | 0 | } |
145 | ||
146 | // Check file size (skip files larger than 5MB) | |
147 | 5 | if let Ok(metadata) = std::fs::metadata(path) { |
148 | 5 | if metadata.len() > 5 * 1024 * 1024 { |
149 | 0 | return false; |
150 | 5 | } |
151 | 0 | } |
152 | ||
153 | 5 | true |
154 | 5 | } |
155 | ||
156 | /// Get cached result if file hasn't been modified | |
157 | 5 | fn get_cached_result(&self, path: &Path, path_str: &str) -> Option<Vec<Match>> { |
158 | 5 | if let Ok(metadata) = std::fs::metadata(path) { |
159 | 5 | if let Ok(modified) = metadata.modified() { |
160 | 5 | if let Some( |
161 | 0 | let (cached_time, cached_matches) = cached_entry.value(); |
162 | 0 | let modified_timestamp = modified |
163 | 0 | .duration_since(std::time::UNIX_EPOCH) |
164 | 0 | .ok()? |
165 | 0 | .as_secs(); |
166 | ||
167 | 0 | if modified_timestamp == *cached_time { |
168 | 0 | return Some(cached_matches.clone()); |
169 | 0 | } |
170 | 5 | } |
171 | 0 | } |
172 | 0 | } |
173 | 5 | None |
174 | 5 | } |
175 | ||
176 | /// Cache result with file modification time | |
177 | 3 | fn cache_result(&self, path: &Path, path_str: &str, matches: &[Match]) { |
178 | // Manage cache size | |
179 | 3 | if self.file_cache.len() >= self.max_cache_size { |
180 | // Remove some old entries (simple LRU-like behavior) | |
181 | 0 | let keys_to_remove: Vec<String> = self |
182 | 0 | .file_cache |
183 | 0 | .iter() |
184 | 0 | .take(self.max_cache_size / 4) |
185 | 0 | .map(|entry| entry.key().clone()) |
186 | 0 | .collect(); |
187 | ||
188 | 0 | for key in keys_to_remove { |
189 | 0 | self.file_cache.remove(&key); |
190 | 0 | } |
191 | 3 | } |
192 | ||
193 | 3 | if let Ok(metadata) = std::fs::metadata(path) { |
194 | 3 | if let Ok(modified) = metadata.modified() { |
195 | 3 | let modified_timestamp = modified |
196 | 3 | .duration_since(std::time::UNIX_EPOCH) |
197 | 3 | .map(|d| d.as_secs()) |
198 | 3 | .unwrap_or(0); |
199 | ||
200 | 3 | self.file_cache |
201 | 3 | .insert(path_str.to_string(), (modified_timestamp, matches.to_vec())); |
202 | 0 | } |
203 | 0 | } |
204 | 3 | } |
205 | ||
206 | /// Clear all caches | |
207 | 0 | pub fn clear_cache(&self) { |
208 | 0 | self.cache.clear(); |
209 | 0 | self.file_cache.clear(); |
210 | 0 | } |
211 | ||
212 | /// Get cache statistics | |
213 | 0 | pub fn cache_stats(&self) -> (usize, usize) { |
214 | 0 | (self.cache.len(), self.file_cache.len()) |
215 | 0 | } |
216 | } | |
217 | ||
218 | /// Memory-efficient streaming scanner for very large codebases | |
219 | pub struct StreamingScanner { | |
220 | detectors: Vec<Box<dyn PatternDetector>>, | |
221 | batch_size: usize, | |
222 | } | |
223 | ||
224 | impl StreamingScanner { | |
225 | 3 | pub fn new(detectors: Vec<Box<dyn PatternDetector>>) -> Self { |
226 | 3 | Self { |
227 | 3 | detectors, |
228 | 3 | batch_size: 100, // Process files in batches |
229 | 3 | } |
230 | 3 | } |
231 | ||
232 | /// Scan with memory-efficient streaming | |
233 | 3 | pub fn scan_streaming<F>(&self, root: &Path, mut callback: F) -> Result<ScanMetrics> |
234 | 3 | where |
235 | 3 | F: FnMut(Vec<Match>) -> Result<()>, |
236 | { | |
237 | 3 | let start_time = Instant::now(); |
238 | 3 | let mut total_files = 0; |
239 | 3 | let mut total_lines = 0; |
240 | 3 | let mut total_matches = 0; |
241 | ||
242 | 3 | let walker = WalkBuilder::new(root).standard_filters(true).build(); |
243 | ||
244 | 3 | let mut file_batch = Vec::new(); |
245 | ||
246 | 24 | for |
247 | 22 | let |
248 | 21 | if entry.file_type().is_some_and(|ft| ft.is_file()) { |
249 | 18 | file_batch.push(entry.path().to_path_buf()); |
250 | ||
251 | 18 | if file_batch.len() >= self.batch_size { |
252 | 0 | let (batch_matches, batch_lines) = self.process_batch(&file_batch)?; |
253 | 0 | total_files += file_batch.len(); |
254 | 0 | total_lines += batch_lines; |
255 | 0 | total_matches += batch_matches.len(); |
256 | ||
257 | 0 | callback(batch_matches)?; |
258 | 0 | file_batch.clear(); |
259 | 18 | } |
260 | 3 | } |
261 | } | |
262 | ||
263 | // Process remaining files | |
264 | 2 | if !file_batch.is_empty() { |
265 | 2 | let (batch_matches, batch_lines) = self.process_batch(&file_batch) |
266 | 2 | total_files += file_batch.len(); |
267 | 2 | total_lines += batch_lines; |
268 | 2 | total_matches += batch_matches.len(); |
269 | ||
270 | 2 | callback(batch_matches) |
271 | 0 | } |
272 | ||
273 | 2 | let duration = start_time.elapsed(); |
274 | ||
275 | 2 | Ok(ScanMetrics { |
276 | 2 | total_files_scanned: total_files, |
277 | 2 | total_lines_processed: total_lines, |
278 | 2 | total_matches_found: total_matches, |
279 | 2 | scan_duration_ms: duration.as_millis() as u64, |
280 | 2 | cache_hits: 0, |
281 | 2 | cache_misses: 0, |
282 | 2 | }) |
283 | 3 | } |
284 | ||
285 | 2 | fn process_batch(&self, files: &[std::path::PathBuf]) -> Result<(Vec<Match>, usize)> { |
286 | 2 | let results: Vec<(Vec<Match>, usize)> = files |
287 | 2 | .par_iter() |
288 | 18 | . |
289 | // Use memory-mapped files for large files (>1MB) for better performance | |
290 | 18 | let ( |
291 | 18 | if metadata.len() > 1024 * 1024 { |
292 | // Use memory mapping for large files | |
293 | 0 | if let Ok(file) = File::open(path) { |
294 | 0 | if let Ok(mmap) = unsafe { Mmap::map(&file) } { |
295 | 0 | let content = std::str::from_utf8(&mmap).ok()?; |
296 | 0 | let line_count = content.lines().count(); |
297 | 0 | (content.to_string(), line_count) |
298 | } else { | |
299 | // Fallback to regular reading | |
300 | 0 | let content = std::fs::read_to_string(path).ok()?; |
301 | 0 | let line_count = content.lines().count(); |
302 | 0 | (content, line_count) |
303 | } | |
304 | } else { | |
305 | 0 | return None; |
306 | } | |
307 | } else { | |
308 | // Use regular reading for smaller files | |
309 | 18 | let |
310 | 16 | let line_count = content.lines().count(); |
311 | 16 | (content, line_count) |
312 | } | |
313 | } else { | |
314 | 0 | return None; |
315 | }; | |
316 | ||
317 | 16 | let matches: Vec<Match> = self |
318 | 16 | .detectors |
319 | 16 | .iter() |
320 | 408 | . |
321 | 16 | .collect(); |
322 | ||
323 | 16 | Some((matches, line_count)) |
324 | 18 | }) |
325 | 2 | .collect(); |
326 | ||
327 | 16 | let |
328 | 2 | let total_lines: usize = results.iter().map(|(_, l)| *l).sum(); |
329 | ||
330 | 2 | Ok((all_matches, total_lines)) |
331 | 2 | } |
332 | } | |
333 | ||
334 | #[cfg(test)] | |
335 | mod tests { | |
336 | use super::*; | |
337 | use crate::detectors::*; | |
338 | use tempfile::TempDir; | |
339 | ||
340 | #[test] | |
341 | fn test_optimized_scanner() { | |
342 | let temp_dir = TempDir::new().unwrap(); | |
343 | let file_path = temp_dir.path().join("test.rs"); | |
344 | std::fs::write(&file_path, "// TODO: test\n// FIXME: another").unwrap(); | |
345 | ||
346 | let detectors: Vec<Box<dyn PatternDetector>> = | |
347 | vec![Box::new(TodoDetector), Box::new(FixmeDetector)]; | |
348 | ||
349 | let scanner = OptimizedScanner::new(detectors); | |
350 | let (matches, metrics) = scanner.scan_optimized(temp_dir.path()).unwrap(); | |
351 | ||
352 | assert_eq!(matches.len(), 2); | |
353 | assert_eq!(metrics.total_files_scanned, 1); | |
354 | assert!(metrics.scan_duration_ms > 0); | |
355 | } | |
356 | ||
357 | #[test] | |
358 | fn test_caching() { | |
359 | let temp_dir = TempDir::new().unwrap(); | |
360 | let file_path = temp_dir.path().join("test.rs"); | |
361 | std::fs::write(&file_path, "// TODO: test").unwrap(); | |
362 | ||
363 | let detectors: Vec<Box<dyn PatternDetector>> = vec![Box::new(TodoDetector)]; | |
364 | let scanner = OptimizedScanner::new(detectors); | |
365 | ||
366 | // First scan | |
367 | let (matches1, _metrics1) = scanner.scan_optimized(temp_dir.path()).unwrap(); | |
368 | ||
369 | // Second scan should use cache | |
370 | let (matches2, metrics2) = scanner.scan_optimized(temp_dir.path()).unwrap(); | |
371 | ||
372 | assert_eq!(matches1.len(), matches2.len()); | |
373 | assert!(metrics2.cache_hits > 0); | |
374 | } | |
375 | } | |
376 | ||
377 | /// Advanced scanner combining multiple optimization techniques | |
378 | pub struct AdvancedScanner { | |
379 | detectors: Vec<Box<dyn PatternDetector>>, | |
380 | high_perf_detector: crate::detectors::HighPerformanceDetector, | |
381 | cache: DashMap<String, (u64, Vec<Match>)>, | |
382 | max_cache_size: usize, | |
383 | use_memory_mapping: bool, | |
384 | } | |
385 | ||
386 | impl AdvancedScanner { | |
387 | /// Creates a new advanced scanner with optimized detectors | |
388 | 0 | pub fn new(detectors: Vec<Box<dyn PatternDetector>>) -> Self { |
389 | 0 | let high_perf_detector = crate::detectors::HighPerformanceDetector::for_common_patterns(); |
390 | ||
391 | 0 | Self { |
392 | 0 | detectors, |
393 | 0 | high_perf_detector, |
394 | 0 | cache: DashMap::new(), |
395 | 0 | max_cache_size: 20000, |
396 | 0 | use_memory_mapping: true, |
397 | 0 | } |
398 | 0 | } |
399 | ||
400 | /// Advanced scan with multiple optimization layers | |
401 | 0 | pub fn scan_advanced(&self, root: &Path) -> Result<(Vec<Match>, ScanMetrics)> { |
402 | 0 | let start_time = Instant::now(); |
403 | 0 | let files_processed = AtomicUsize::new(0); |
404 | 0 | let lines_processed = AtomicUsize::new(0); |
405 | 0 | let cache_hits = AtomicUsize::new(0); |
406 | 0 | let cache_misses = AtomicUsize::new(0); |
407 | ||
408 | 0 | let matches: Vec<Match> = WalkBuilder::new(root) |
409 | 0 | .standard_filters(true) |
410 | 0 | .build() |
411 | 0 | .par_bridge() |
412 | 0 | .filter_map(|entry| { |
413 | 0 | let entry = entry.ok()?; |
414 | 0 | let file_type = entry.file_type()?; |
415 | ||
416 | 0 | if !file_type.is_file() { |
417 | 0 | return None; |
418 | 0 | } |
419 | ||
420 | 0 | let path = entry.path(); |
421 | ||
422 | // Skip inappropriate files early | |
423 | 0 | if !self.should_scan_file_advanced(path) { |
424 | 0 | return None; |
425 | 0 | } |
426 | ||
427 | 0 | files_processed.fetch_add(1, Ordering::Relaxed); |
428 | 0 | let path_str = path.to_string_lossy().to_string(); |
429 | ||
430 | // Check cache | |
431 | 0 | if let Some(cached_result) = self.get_cached_result_advanced(path, &path_str) { |
432 | 0 | cache_hits.fetch_add(1, Ordering::Relaxed); |
433 | 0 | return Some(cached_result); |
434 | 0 | } |
435 | ||
436 | 0 | cache_misses.fetch_add(1, Ordering::Relaxed); |
437 | ||
438 | // Read content with optimizations | |
439 | 0 | let content = self.read_file_content_advanced(path).ok()?; |
440 | 0 | lines_processed.fetch_add(content.lines().count(), Ordering::Relaxed); |
441 | ||
442 | // Use high-performance detector for common patterns | |
443 | 0 | let mut file_matches = self.high_perf_detector.detect(&content, path); |
444 | ||
445 | // Use specialized detectors for remaining patterns | |
446 | 0 | for detector in &self.detectors { |
447 | 0 | file_matches.extend(detector.detect(&content, path)); |
448 | 0 | } |
449 | ||
450 | // Remove duplicates (patterns might overlap) | |
451 | 0 | file_matches.sort_by(|a, b| (a.line_number, a.column, a.pattern.clone()).cmp(&(b.line_number, b.column, b.pattern.clone()))); |
452 | 0 | file_matches.dedup_by(|a, b| a.line_number == b.line_number && a.column == b.column && a.pattern == b.pattern); |
453 | ||
454 | // Cache result | |
455 | 0 | self.cache_result_advanced(path, &path_str, &file_matches); |
456 | ||
457 | 0 | Some(file_matches) |
458 | 0 | }) |
459 | 0 | .flatten() |
460 | 0 | .collect(); |
461 | ||
462 | 0 | let duration = start_time.elapsed(); |
463 | ||
464 | 0 | let metrics = ScanMetrics { |
465 | 0 | total_files_scanned: files_processed.load(Ordering::Relaxed), |
466 | 0 | total_lines_processed: lines_processed.load(Ordering::Relaxed), |
467 | 0 | total_matches_found: matches.len(), |
468 | 0 | scan_duration_ms: duration.as_millis() as u64, |
469 | 0 | cache_hits: cache_hits.load(Ordering::Relaxed), |
470 | 0 | cache_misses: cache_misses.load(Ordering::Relaxed), |
471 | 0 | }; |
472 | ||
473 | 0 | Ok((matches, metrics)) |
474 | 0 | } |
475 | ||
476 | /// Advanced file filtering with better heuristics | |
477 | 0 | fn should_scan_file_advanced(&self, path: &Path) -> bool { |
478 | // Basic checks | |
479 | 0 | if let Some(ext) = path.extension().and_then(|s| s.to_str()) { |
480 | 0 | match ext.to_lowercase().as_str() { |
481 | // Skip binary files | |
482 | 0 | "exe" | "dll" | "so" | "dylib" | "bin" | "obj" | "o" | "a" | "lib" | |
483 | 0 | "png" | "jpg" | "jpeg" | "gif" | "svg" | "ico" | "bmp" | "tiff" | |
484 | 0 | "zip" | "tar" | "gz" | "rar" | "7z" | "bz2" | "xz" | |
485 | 0 | "mp3" | "mp4" | "avi" | "mov" | "wav" | "flac" | |
486 | 0 | "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" => return false, |
487 | 0 | _ => {} |
488 | } | |
489 | 0 | } |
490 | ||
491 | // Size check with larger limit for advanced scanner | |
492 | 0 | if let Ok(metadata) = std::fs::metadata(path) { |
493 | 0 | if metadata.len() > 10 * 1024 * 1024 { // 10MB limit |
494 | 0 | return false; |
495 | 0 | } |
496 | 0 | } |
497 | ||
498 | // Skip files in common build/dependency directories | |
499 | 0 | if let Some(path_str) = path.to_str() { |
500 | 0 | if path_str.contains("/target/") || |
501 | 0 | path_str.contains("/node_modules/") || |
502 | 0 | path_str.contains("/.git/") || |
503 | 0 | path_str.contains("/build/") || |
504 | 0 | path_str.contains("/dist/") || |
505 | 0 | path_str.contains("/.next/") || |
506 | 0 | path_str.contains("/.nuxt/") { |
507 | 0 | return false; |
508 | 0 | } |
509 | 0 | } |
510 | ||
511 | 0 | true |
512 | 0 | } |
513 | ||
514 | /// Advanced file reading with memory mapping for large files | |
515 | 0 | fn read_file_content_advanced(&self, path: &Path) -> Result<String> { |
516 | 0 | if !self.use_memory_mapping { |
517 | 0 | return Ok(std::fs::read_to_string(path)?); |
518 | 0 | } |
519 | ||
520 | 0 | let metadata = std::fs::metadata(path)?; |
521 | ||
522 | 0 | if metadata.len() > 1024 * 1024 { // 1MB threshold |
523 | // Use memory mapping for large files | |
524 | 0 | let file = File::open(path)?; |
525 | 0 | let mmap = unsafe { Mmap::map(&file)? }; |
526 | 0 | let content = std::str::from_utf8(&mmap)?; |
527 | 0 | Ok(content.to_string()) |
528 | } else { | |
529 | // Regular reading for smaller files | |
530 | 0 | Ok(std::fs::read_to_string(path)?) |
531 | } | |
532 | 0 | } |
533 | ||
534 | /// Advanced caching with better invalidation | |
535 | 0 | fn get_cached_result_advanced(&self, path: &Path, path_str: &str) -> Option<Vec<Match>> { |
536 | 0 | if let Ok(metadata) = std::fs::metadata(path) { |
537 | 0 | if let Ok(modified) = metadata.modified() { |
538 | 0 | if let Some(cached_entry) = self.cache.get(path_str) { |
539 | 0 | let (cached_time, cached_matches) = cached_entry.value(); |
540 | 0 | let modified_timestamp = modified |
541 | 0 | .duration_since(std::time::UNIX_EPOCH) |
542 | 0 | .ok()? |
543 | 0 | .as_secs(); |
544 | ||
545 | 0 | if modified_timestamp == *cached_time { |
546 | 0 | return Some(cached_matches.clone()); |
547 | 0 | } |
548 | 0 | } |
549 | 0 | } |
550 | 0 | } |
551 | 0 | None |
552 | 0 | } |
553 | ||
554 | /// Cache result with LRU-style eviction | |
555 | 0 | fn cache_result_advanced(&self, path: &Path, path_str: &str, matches: &[Match]) { |
556 | // Manage cache size | |
557 | 0 | if self.cache.len() >= self.max_cache_size { |
558 | 0 | let keys_to_remove: Vec<String> = self |
559 | 0 | .cache |
560 | 0 | .iter() |
561 | 0 | .take(self.max_cache_size / 4) |
562 | 0 | .map(|entry| entry.key().clone()) |
563 | 0 | .collect(); |
564 | ||
565 | 0 | for key in keys_to_remove { |
566 | 0 | self.cache.remove(&key); |
567 | 0 | } |
568 | 0 | } |
569 | ||
570 | 0 | if let Ok(metadata) = std::fs::metadata(path) { |
571 | 0 | if let Ok(modified) = metadata.modified() { |
572 | 0 | let modified_timestamp = modified |
573 | 0 | .duration_since(std::time::UNIX_EPOCH) |
574 | 0 | .map(|d| d.as_secs()) |
575 | 0 | .unwrap_or(0); |
576 | ||
577 | 0 | self.cache |
578 | 0 | .insert(path_str.to_string(), (modified_timestamp, matches.to_vec())); |
579 | 0 | } |
580 | 0 | } |
581 | 0 | } |
582 | ||
583 | /// Configure memory mapping usage | |
584 | 0 | pub fn with_memory_mapping(mut self, enabled: bool) -> Self { |
585 | 0 | self.use_memory_mapping = enabled; |
586 | 0 | self |
587 | 0 | } |
588 | ||
589 | /// Set cache size | |
590 | 0 | pub fn with_cache_size(mut self, size: usize) -> Self { |
591 | 0 | self.max_cache_size = size; |
592 | 0 | self |
593 | 0 | } |
594 | } |
Line | Count | Source |
1 | use std::collections::HashMap; | |
2 | use std::time::{Duration, Instant}; | |
3 | ||
4 | /// Performance profiler for tracking operation timings | |
5 | #[derive(Debug, Clone)] | |
6 | pub struct PerformanceProfiler { | |
7 | timings: HashMap<String, Vec<Duration>>, | |
8 | start_times: HashMap<String, Instant>, | |
9 | } | |
10 | ||
11 | impl PerformanceProfiler { | |
12 | 0 | pub fn new() -> Self { |
13 | 0 | Self { |
14 | 0 | timings: HashMap::new(), |
15 | 0 | start_times: HashMap::new(), |
16 | 0 | } |
17 | 0 | } |
18 | ||
19 | /// Start timing an operation | |
20 | 0 | pub fn start(&mut self, operation: &str) { |
21 | 0 | self.start_times |
22 | 0 | .insert(operation.to_string(), Instant::now()); |
23 | 0 | } |
24 | ||
25 | /// End timing an operation | |
26 | 0 | pub fn end(&mut self, operation: &str) { |
27 | 0 | if let Some(start_time) = self.start_times.remove(operation) { |
28 | 0 | let duration = start_time.elapsed(); |
29 | 0 | self.timings |
30 | 0 | .entry(operation.to_string()) |
31 | 0 | .or_default() |
32 | 0 | .push(duration); |
33 | 0 | } |
34 | 0 | } |
35 | ||
36 | /// Get average duration for an operation | |
37 | 0 | pub fn average_duration(&self, operation: &str) -> Option<Duration> { |
38 | 0 | let durations = self.timings.get(operation)?; |
39 | 0 | if durations.is_empty() { |
40 | 0 | return None; |
41 | 0 | } |
42 | ||
43 | 0 | let total: Duration = durations.iter().sum(); |
44 | 0 | Some(total / durations.len() as u32) |
45 | 0 | } |
46 | ||
47 | /// Get total duration for an operation | |
48 | 0 | pub fn total_duration(&self, operation: &str) -> Option<Duration> { |
49 | 0 | let durations = self.timings.get(operation)?; |
50 | 0 | Some(durations.iter().sum()) |
51 | 0 | } |
52 | ||
53 | /// Get operation count | |
54 | 0 | pub fn operation_count(&self, operation: &str) -> usize { |
55 | 0 | self.timings.get(operation).map_or(0, |d| d.len()) |
56 | 0 | } |
57 | ||
58 | /// Generate performance report | |
59 | 0 | pub fn report(&self) -> String { |
60 | 0 | let mut report = String::from("Performance Report:\n"); |
61 | 0 | report.push_str("==================\n\n"); |
62 | ||
63 | 0 | for (operation, durations) in &self.timings { |
64 | 0 | if durations.is_empty() { |
65 | 0 | continue; |
66 | 0 | } |
67 | ||
68 | 0 | let total: Duration = durations.iter().sum(); |
69 | 0 | let average = total / durations.len() as u32; |
70 | 0 | let min = *durations.iter().min().unwrap(); |
71 | 0 | let max = *durations.iter().max().unwrap(); |
72 | ||
73 | 0 | report.push_str(&format!( |
74 | 0 | "{}: {} calls\n Total: {:?}\n Average: {:?}\n Min: {:?}\n Max: {:?}\n\n", |
75 | 0 | operation, |
76 | 0 | durations.len(), |
77 | 0 | total, |
78 | 0 | average, |
79 | 0 | min, |
80 | 0 | max |
81 | 0 | )); |
82 | } | |
83 | ||
84 | 0 | report |
85 | 0 | } |
86 | ||
87 | /// Clear all timings | |
88 | 0 | pub fn clear(&mut self) { |
89 | 0 | self.timings.clear(); |
90 | 0 | self.start_times.clear(); |
91 | 0 | } |
92 | } | |
93 | ||
94 | impl Default for PerformanceProfiler { | |
95 | 0 | fn default() -> Self { |
96 | 0 | Self::new() |
97 | 0 | } |
98 | } | |
99 | ||
100 | /// Memory usage tracker | |
101 | #[derive(Debug, Clone)] | |
102 | pub struct MemoryTracker { | |
103 | peak_memory: usize, | |
104 | current_memory: usize, | |
105 | } | |
106 | ||
107 | impl MemoryTracker { | |
108 | 0 | pub fn new() -> Self { |
109 | 0 | Self { |
110 | 0 | peak_memory: 0, |
111 | 0 | current_memory: 0, |
112 | 0 | } |
113 | 0 | } |
114 | ||
115 | /// Track memory allocation | |
116 | 0 | pub fn allocate(&mut self, size: usize) { |
117 | 0 | self.current_memory += size; |
118 | 0 | if self.current_memory > self.peak_memory { |
119 | 0 | self.peak_memory = self.current_memory; |
120 | 0 | } |
121 | 0 | } |
122 | ||
123 | /// Track memory deallocation | |
124 | 0 | pub fn deallocate(&mut self, size: usize) { |
125 | 0 | self.current_memory = self.current_memory.saturating_sub(size); |
126 | 0 | } |
127 | ||
128 | /// Get current memory usage | |
129 | 0 | pub fn current_usage(&self) -> usize { |
130 | 0 | self.current_memory |
131 | 0 | } |
132 | ||
133 | /// Get peak memory usage | |
134 | 0 | pub fn peak_usage(&self) -> usize { |
135 | 0 | self.peak_memory |
136 | 0 | } |
137 | ||
138 | /// Reset tracking | |
139 | 0 | pub fn reset(&mut self) { |
140 | 0 | self.current_memory = 0; |
141 | 0 | self.peak_memory = 0; |
142 | 0 | } |
143 | } | |
144 | ||
145 | impl Default for MemoryTracker { | |
146 | 0 | fn default() -> Self { |
147 | 0 | Self::new() |
148 | 0 | } |
149 | } | |
150 | ||
151 | /// Input stats for performance calculation | |
152 | #[derive(Debug, Clone)] | |
153 | pub struct ScanStats { | |
154 | pub scan_duration: Duration, | |
155 | pub total_files: usize, | |
156 | pub total_lines: usize, | |
157 | pub total_matches: usize, | |
158 | pub cache_hits: usize, | |
159 | pub cache_total: usize, | |
160 | pub memory_usage_bytes: usize, | |
161 | pub thread_count: usize, | |
162 | } | |
163 | ||
164 | /// Comprehensive performance metrics | |
165 | #[derive(Debug, Clone)] | |
166 | pub struct PerformanceMetrics { | |
167 | pub scan_duration: Duration, | |
168 | pub files_per_second: f64, | |
169 | pub lines_per_second: f64, | |
170 | pub matches_per_second: f64, | |
171 | pub cache_hit_rate: f64, | |
172 | pub memory_usage_mb: f64, | |
173 | pub parallelism_efficiency: f64, | |
174 | } | |
175 | ||
176 | impl PerformanceMetrics { | |
177 | 0 | pub fn calculate(stats: ScanStats) -> Self { |
178 | 0 | let duration_secs = stats.scan_duration.as_secs_f64(); |
179 | ||
180 | 0 | let files_per_second = if duration_secs > 0.0 { |
181 | 0 | stats.total_files as f64 / duration_secs |
182 | } else { | |
183 | 0 | 0.0 |
184 | }; | |
185 | ||
186 | 0 | let lines_per_second = if duration_secs > 0.0 { |
187 | 0 | stats.total_lines as f64 / duration_secs |
188 | } else { | |
189 | 0 | 0.0 |
190 | }; | |
191 | ||
192 | 0 | let matches_per_second = if duration_secs > 0.0 { |
193 | 0 | stats.total_matches as f64 / duration_secs |
194 | } else { | |
195 | 0 | 0.0 |
196 | }; | |
197 | ||
198 | 0 | let cache_hit_rate = if stats.cache_total > 0 { |
199 | 0 | stats.cache_hits as f64 / stats.cache_total as f64 |
200 | } else { | |
201 | 0 | 0.0 |
202 | }; | |
203 | ||
204 | 0 | let memory_usage_mb = stats.memory_usage_bytes as f64 / (1024.0 * 1024.0); |
205 | ||
206 | // Simple parallelism efficiency metric | |
207 | 0 | let ideal_duration = duration_secs * stats.thread_count as f64; |
208 | 0 | let parallelism_efficiency = if ideal_duration > 0.0 { |
209 | 0 | (duration_secs / ideal_duration).min(1.0) |
210 | } else { | |
211 | 0 | 0.0 |
212 | }; | |
213 | ||
214 | 0 | Self { |
215 | 0 | scan_duration: stats.scan_duration, |
216 | 0 | files_per_second, |
217 | 0 | lines_per_second, |
218 | 0 | matches_per_second, |
219 | 0 | cache_hit_rate, |
220 | 0 | memory_usage_mb, |
221 | 0 | parallelism_efficiency, |
222 | 0 | } |
223 | 0 | } |
224 | ||
225 | 0 | pub fn report(&self) -> String { |
226 | 0 | format!( |
227 | 0 | "Performance Metrics:\n\ |
228 | 0 | ===================\n\ |
229 | 0 | Scan Duration: {:?}\n\ |
230 | 0 | Files/sec: {:.2}\n\ |
231 | 0 | Lines/sec: {:.2}\n\ |
232 | 0 | Matches/sec: {:.2}\n\ |
233 | 0 | Cache Hit Rate: {:.2}%\n\ |
234 | 0 | Memory Usage: {:.2} MB\n\ |
235 | 0 | Parallelism Efficiency: {:.2}%\n", |
236 | self.scan_duration, | |
237 | self.files_per_second, | |
238 | self.lines_per_second, | |
239 | self.matches_per_second, | |
240 | 0 | self.cache_hit_rate * 100.0, |
241 | self.memory_usage_mb, | |
242 | 0 | self.parallelism_efficiency * 100.0 |
243 | ) | |
244 | 0 | } |
245 | } | |
246 | ||
247 | #[cfg(test)] | |
248 | mod tests { | |
249 | use super::*; | |
250 | use std::thread; | |
251 | ||
252 | #[test] | |
253 | fn test_performance_profiler() { | |
254 | let mut profiler = PerformanceProfiler::new(); | |
255 | ||
256 | profiler.start("test_operation"); | |
257 | thread::sleep(Duration::from_millis(10)); | |
258 | profiler.end("test_operation"); | |
259 | ||
260 | assert!(profiler.average_duration("test_operation").is_some()); | |
261 | assert_eq!(profiler.operation_count("test_operation"), 1); | |
262 | } | |
263 | ||
264 | #[test] | |
265 | fn test_memory_tracker() { | |
266 | let mut tracker = MemoryTracker::new(); | |
267 | ||
268 | tracker.allocate(1024); | |
269 | assert_eq!(tracker.current_usage(), 1024); | |
270 | assert_eq!(tracker.peak_usage(), 1024); | |
271 | ||
272 | tracker.allocate(512); | |
273 | assert_eq!(tracker.current_usage(), 1536); | |
274 | assert_eq!(tracker.peak_usage(), 1536); | |
275 | ||
276 | tracker.deallocate(1024); | |
277 | assert_eq!(tracker.current_usage(), 512); | |
278 | assert_eq!(tracker.peak_usage(), 1536); // Peak should remain | |
279 | } | |
280 | ||
281 | #[test] | |
282 | fn test_performance_metrics() { | |
283 | let stats = ScanStats { | |
284 | scan_duration: Duration::from_secs(2), | |
285 | total_files: 100, | |
286 | total_lines: 10000, | |
287 | total_matches: 50, | |
288 | cache_hits: 80, | |
289 | cache_total: 100, | |
290 | memory_usage_bytes: 1024 * 1024, | |
291 | thread_count: 4, | |
292 | }; | |
293 | let metrics = PerformanceMetrics::calculate(stats); | |
294 | ||
295 | assert_eq!(metrics.files_per_second, 50.0); | |
296 | assert_eq!(metrics.lines_per_second, 5000.0); | |
297 | assert_eq!(metrics.matches_per_second, 25.0); | |
298 | assert_eq!(metrics.cache_hit_rate, 0.8); | |
299 | assert_eq!(metrics.memory_usage_mb, 1.0); | |
300 | } | |
301 | } |
Line | Count | Source |
1 | use super::Formatter; | |
2 | use code_guardian_core::Match; | |
3 | ||
4 | /// Formatter that outputs matches in CSV format. | |
5 | /// Includes headers for spreadsheet compatibility. | |
6 | pub struct CsvFormatter; | |
7 | ||
8 | impl Formatter for CsvFormatter { | |
9 | 1 | fn format(&self, matches: &[Match]) -> String { |
10 | 1 | let mut wtr = csv::Writer::from_writer(vec![]); |
11 | 1 | wtr.write_record(["file_path", "line_number", "column", "pattern", "message"]) |
12 | 1 | .unwrap(); |
13 | ||
14 | 2 | for |
15 | 1 | wtr.write_record([ |
16 | 1 | &m.file_path, |
17 | 1 | &m.line_number.to_string(), |
18 | 1 | &m.column.to_string(), |
19 | 1 | &m.pattern, |
20 | 1 | &m.message, |
21 | 1 | ]) |
22 | 1 | .unwrap(); |
23 | 1 | } |
24 | ||
25 | 1 | wtr.flush().unwrap(); |
26 | 1 | String::from_utf8(wtr.into_inner().unwrap()).unwrap() |
27 | 1 | } |
28 | } | |
29 | ||
30 | #[cfg(test)] | |
31 | mod tests { | |
32 | use super::*; | |
33 | ||
34 | #[test] | |
35 | fn test_empty_matches() { | |
36 | let formatter = CsvFormatter; | |
37 | let matches = vec![]; | |
38 | let output = formatter.format(&matches); | |
39 | let lines: Vec<&str> = output.lines().collect(); | |
40 | assert_eq!(lines.len(), 1); // Only header | |
41 | assert!(lines[0].contains("file_path,line_number,column,pattern,message")); | |
42 | } | |
43 | ||
44 | #[test] | |
45 | fn test_single_match() { | |
46 | let formatter = CsvFormatter; | |
47 | let matches = vec![Match { | |
48 | file_path: "test.rs".to_string(), | |
49 | line_number: 1, | |
50 | column: 1, | |
51 | pattern: "TODO".to_string(), | |
52 | message: "TODO: fix this".to_string(), | |
53 | }]; | |
54 | let output = formatter.format(&matches); | |
55 | let lines: Vec<&str> = output.lines().collect(); | |
56 | assert_eq!(lines.len(), 2); | |
57 | assert!(lines[1].contains("test.rs,1,1,TODO,TODO: fix this")); | |
58 | } | |
59 | ||
60 | #[test] | |
61 | fn test_multiple_matches() { | |
62 | let formatter = CsvFormatter; | |
63 | let matches = vec![ | |
64 | Match { | |
65 | file_path: "test.rs".to_string(), | |
66 | line_number: 1, | |
67 | column: 1, | |
68 | pattern: "TODO".to_string(), | |
69 | message: "TODO".to_string(), | |
70 | }, | |
71 | Match { | |
72 | file_path: "test.js".to_string(), | |
73 | line_number: 2, | |
74 | column: 3, | |
75 | pattern: "FIXME".to_string(), | |
76 | message: "FIXME".to_string(), | |
77 | }, | |
78 | ]; | |
79 | let output = formatter.format(&matches); | |
80 | let lines: Vec<&str> = output.lines().collect(); | |
81 | assert_eq!(lines.len(), 3); | |
82 | assert!(lines[1].contains("test.rs")); | |
83 | assert!(lines[2].contains("test.js")); | |
84 | } | |
85 | ||
86 | #[test] | |
87 | fn test_csv_escaping() { | |
88 | let formatter = CsvFormatter; | |
89 | let matches = vec![Match { | |
90 | file_path: "test,file.rs".to_string(), | |
91 | line_number: 1, | |
92 | column: 1, | |
93 | pattern: "TODO".to_string(), | |
94 | message: "TODO, with comma".to_string(), | |
95 | }]; | |
96 | let output = formatter.format(&matches); | |
97 | let lines: Vec<&str> = output.lines().collect(); | |
98 | assert!(lines[1].contains("\"test,file.rs\"")); | |
99 | assert!(lines[1].contains("\"TODO, with comma\"")); | |
100 | } | |
101 | } | |
102 | ||
103 | #[cfg(test)] | |
104 | mod proptest_tests { | |
105 | use super::*; | |
106 | use proptest::prelude::*; | |
107 | ||
108 | fn arb_match() -> impl Strategy<Value = Match> { | |
109 | ( | |
110 | "[a-zA-Z0-9_.]+", | |
111 | 1..10000usize, | |
112 | 1..10000usize, | |
113 | "[A-Z]+", | |
114 | ".*", | |
115 | ) | |
116 | .prop_map(|(fp, ln, col, pat, msg)| Match { | |
117 | file_path: fp.to_string(), | |
118 | line_number: ln, | |
119 | column: col, | |
120 | pattern: pat.to_string(), | |
121 | message: msg.to_string(), | |
122 | }) | |
123 | } | |
124 | ||
125 | proptest! { | |
126 | #[test] | |
127 | fn test_csv_formatter_arbitrary_matches(matches in proptest::collection::vec(arb_match(), 0..10)) { | |
128 | let formatter = CsvFormatter; | |
129 | let output = formatter.format(&matches); | |
130 | // Check that it's valid CSV | |
131 | let mut rdr = csv::Reader::from_reader(output.as_bytes()); | |
132 | let records: Vec<_> = rdr.records().collect(); | |
133 | prop_assert_eq!(records.len(), matches.len()); | |
134 | for (i, record) in records.into_iter().enumerate() { | |
135 | let record = record.unwrap(); | |
136 | prop_assert_eq!(record.len(), 5); | |
137 | prop_assert_eq!(record[0].to_string(), matches[i].file_path.clone()); | |
138 | prop_assert_eq!(record[1].to_string(), matches[i].line_number.to_string()); | |
139 | prop_assert_eq!(record[2].to_string(), matches[i].column.to_string()); | |
140 | prop_assert_eq!(record[3].to_string(), matches[i].pattern.clone()); | |
141 | prop_assert_eq!(record[4].to_string(), matches[i].message.clone()); | |
142 | } | |
143 | } | |
144 | } | |
145 | } |
Line | Count | Source |
1 | use super::Formatter; | |
2 | use code_guardian_core::Match; | |
3 | ||
4 | /// Formatter that outputs matches in HTML table format. | |
5 | /// Includes basic HTML structure for standalone display. | |
6 | pub struct HtmlFormatter; | |
7 | ||
8 | impl Formatter for HtmlFormatter { | |
9 | 1 | fn format(&self, matches: &[Match]) -> String { |
10 | 1 | let mut output = String::from( |
11 | r#"<!DOCTYPE html> | |
12 | <html> | |
13 | <head> | |
14 | <title>Code Guardian Matches</title> | |
15 | <style> | |
16 | table { border-collapse: collapse; width: 100%; } | |
17 | th, td { border: 1px solid #ddd; padding: 8px; text-align: left; } | |
18 | th { background-color: #f2f2f2; } | |
19 | tr:nth-child(even) { background-color: #f9f9f9; } | |
20 | </style> | |
21 | </head> | |
22 | <body> | |
23 | <h1>Code Guardian Scan Results</h1> | |
24 | <table> | |
25 | <thead> | |
26 | <tr> | |
27 | <th>File</th> | |
28 | <th>Line</th> | |
29 | <th>Column</th> | |
30 | <th>Pattern</th> | |
31 | <th>Message</th> | |
32 | </tr> | |
33 | </thead> | |
34 | <tbody> | |
35 | "#, | |
36 | ); | |
37 | ||
38 | 1 | if matches.is_empty() { |
39 | 0 | output.push_str(" <tr><td colspan=\"5\">No matches found.</td></tr>\n"); |
40 | 0 | } else { |
41 | 2 | for |
42 | 1 | output.push_str(&format!( |
43 | 1 | " <tr>\n <td>{}</td>\n <td>{}</td>\n <td>{}</td>\n <td>{}</td>\n <td>{}</td>\n </tr>\n", |
44 | 1 | html_escape(&m.file_path), |
45 | 1 | m.line_number, |
46 | 1 | m.column, |
47 | 1 | html_escape(&m.pattern), |
48 | 1 | html_escape(&m.message) |
49 | 1 | )); |
50 | 1 | } |
51 | } | |
52 | ||
53 | 1 | output.push_str( |
54 | 1 | r#" </tbody> |
55 | 1 | </table> |
56 | 1 | </body> |
57 | 1 | </html> |
58 | 1 | "#, |
59 | ); | |
60 | ||
61 | 1 | output |
62 | 1 | } |
63 | } | |
64 | ||
65 | /// Escapes HTML special characters. | |
66 | 3 | fn html_escape(text: &str) -> String { |
67 | 3 | text.replace('&', "&") |
68 | 3 | .replace('<', "<") |
69 | 3 | .replace('>', ">") |
70 | 3 | .replace('"', """) |
71 | 3 | .replace('\'', "'") |
72 | 3 | } |
73 | ||
74 | #[cfg(test)] | |
75 | mod tests { | |
76 | use super::*; | |
77 | ||
78 | #[test] | |
79 | fn test_empty_matches() { | |
80 | let formatter = HtmlFormatter; | |
81 | let matches = vec![]; | |
82 | let output = formatter.format(&matches); | |
83 | assert!(output.contains("<table>")); | |
84 | assert!(output.contains("No matches found.")); | |
85 | assert!(output.contains("</html>")); | |
86 | } | |
87 | ||
88 | #[test] | |
89 | fn test_single_match() { | |
90 | let formatter = HtmlFormatter; | |
91 | let matches = vec![Match { | |
92 | file_path: "test.rs".to_string(), | |
93 | line_number: 1, | |
94 | column: 1, | |
95 | pattern: "TODO".to_string(), | |
96 | message: "TODO: fix this".to_string(), | |
97 | }]; | |
98 | let output = formatter.format(&matches); | |
99 | assert!(output.contains("<table>")); | |
100 | assert!(output.contains("<td>test.rs</td>")); | |
101 | assert!(output.contains("<td>1</td>")); | |
102 | assert!(output.contains("<td>TODO</td>")); | |
103 | assert!(output.contains("<td>TODO: fix this</td>")); | |
104 | assert!(output.contains("</html>")); | |
105 | } | |
106 | ||
107 | #[test] | |
108 | fn test_html_escape() { | |
109 | let formatter = HtmlFormatter; | |
110 | let matches = vec![Match { | |
111 | file_path: "test&<>\"'.rs".to_string(), | |
112 | line_number: 1, | |
113 | column: 1, | |
114 | pattern: "TODO".to_string(), | |
115 | message: "TODO&<>\"'".to_string(), | |
116 | }]; | |
117 | let output = formatter.format(&matches); | |
118 | assert!(output.contains("test&<>"'.rs")); | |
119 | assert!(output.contains("TODO&<>"'")); | |
120 | } | |
121 | ||
122 | #[test] | |
123 | fn test_multiple_matches() { | |
124 | let formatter = HtmlFormatter; | |
125 | let matches = vec![ | |
126 | Match { | |
127 | file_path: "test.rs".to_string(), | |
128 | line_number: 1, | |
129 | column: 1, | |
130 | pattern: "TODO".to_string(), | |
131 | message: "TODO".to_string(), | |
132 | }, | |
133 | Match { | |
134 | file_path: "test.js".to_string(), | |
135 | line_number: 2, | |
136 | column: 3, | |
137 | pattern: "FIXME".to_string(), | |
138 | message: "FIXME".to_string(), | |
139 | }, | |
140 | ]; | |
141 | let output = formatter.format(&matches); | |
142 | assert!(output.contains("test.rs")); | |
143 | assert!(output.contains("test.js")); | |
144 | assert!(output.contains("TODO")); | |
145 | assert!(output.contains("FIXME")); | |
146 | } | |
147 | } | |
148 | ||
149 | #[cfg(test)] | |
150 | mod proptest_tests { | |
151 | use super::*; | |
152 | use proptest::prelude::*; | |
153 | ||
154 | fn arb_match() -> impl Strategy<Value = Match> { | |
155 | ( | |
156 | "[a-zA-Z0-9_.]+", | |
157 | 1..10000usize, | |
158 | 1..10000usize, | |
159 | "[A-Z]+", | |
160 | ".*", | |
161 | ) | |
162 | .prop_map(|(fp, ln, col, pat, msg)| Match { | |
163 | file_path: fp.to_string(), | |
164 | line_number: ln, | |
165 | column: col, | |
166 | pattern: pat.to_string(), | |
167 | message: msg.to_string(), | |
168 | }) | |
169 | } | |
170 | ||
171 | proptest! { | |
172 | #[test] | |
173 | fn test_html_formatter_arbitrary_matches(matches in proptest::collection::vec(arb_match(), 0..10)) { | |
174 | let formatter = HtmlFormatter; | |
175 | let output = formatter.format(&matches); | |
176 | prop_assert!(output.contains("<html>")); | |
177 | prop_assert!(output.contains("</html>")); | |
178 | if matches.is_empty() { | |
179 | prop_assert!(output.contains("No matches found.")); | |
180 | } else { | |
181 | prop_assert!(output.contains("<table>")); | |
182 | } | |
183 | } | |
184 | } | |
185 | } |
Line | Count | Source |
1 | use super::Formatter; | |
2 | use code_guardian_core::Match; | |
3 | ||
4 | /// Formatter that outputs matches in JSON format. | |
5 | /// Uses pretty-printed JSON for readability. | |
6 | pub struct JsonFormatter; | |
7 | ||
8 | impl Formatter for JsonFormatter { | |
9 | 4 | fn format(&self, matches: &[Match]) -> String { |
10 | 4 | serde_json::to_string_pretty(matches).unwrap_or_else(|_| |
11 | 4 | } |
12 | } | |
13 | ||
14 | #[cfg(test)] | |
15 | mod tests { | |
16 | use super::*; | |
17 | use code_guardian_core::Match; | |
18 | ||
19 | #[test] | |
20 | fn test_empty_matches() { | |
21 | let formatter = JsonFormatter; | |
22 | let matches = vec![]; | |
23 | let output = formatter.format(&matches); | |
24 | assert_eq!(output, "[]"); | |
25 | } | |
26 | ||
27 | #[test] | |
28 | fn test_single_match() { | |
29 | let formatter = JsonFormatter; | |
30 | let matches = vec![Match { | |
31 | file_path: "test.rs".to_string(), | |
32 | line_number: 1, | |
33 | column: 1, | |
34 | pattern: "TODO".to_string(), | |
35 | message: "TODO: fix this".to_string(), | |
36 | }]; | |
37 | let output = formatter.format(&matches); | |
38 | let expected = r#"[ | |
39 | { | |
40 | "file_path": "test.rs", | |
41 | "line_number": 1, | |
42 | "column": 1, | |
43 | "pattern": "TODO", | |
44 | "message": "TODO: fix this" | |
45 | } | |
46 | ]"#; | |
47 | assert_eq!(output, expected); | |
48 | } | |
49 | ||
50 | #[test] | |
51 | fn test_multiple_matches() { | |
52 | let formatter = JsonFormatter; | |
53 | let matches = vec![ | |
54 | Match { | |
55 | file_path: "test.rs".to_string(), | |
56 | line_number: 1, | |
57 | column: 1, | |
58 | pattern: "TODO".to_string(), | |
59 | message: "TODO".to_string(), | |
60 | }, | |
61 | Match { | |
62 | file_path: "test.js".to_string(), | |
63 | line_number: 2, | |
64 | column: 3, | |
65 | pattern: "FIXME".to_string(), | |
66 | message: "FIXME".to_string(), | |
67 | }, | |
68 | ]; | |
69 | let output = formatter.format(&matches); | |
70 | // Check that it's valid JSON and contains the data | |
71 | let parsed: Vec<Match> = serde_json::from_str(&output).unwrap(); | |
72 | assert_eq!(parsed, matches); | |
73 | } | |
74 | } | |
75 | ||
76 | #[cfg(test)] | |
77 | mod proptest_tests { | |
78 | use super::*; | |
79 | use proptest::prelude::*; | |
80 | ||
81 | fn arb_match() -> impl Strategy<Value = Match> { | |
82 | ( | |
83 | "[a-zA-Z0-9_.]+", | |
84 | 1..10000usize, | |
85 | 1..10000usize, | |
86 | "[A-Z]+", | |
87 | ".*", | |
88 | ) | |
89 | .prop_map(|(fp, ln, col, pat, msg)| Match { | |
90 | file_path: fp.to_string(), | |
91 | line_number: ln, | |
92 | column: col, | |
93 | pattern: pat.to_string(), | |
94 | message: msg.to_string(), | |
95 | }) | |
96 | } | |
97 | ||
98 | proptest! { | |
99 | #[test] | |
100 | fn test_json_formatter_arbitrary_matches(matches in proptest::collection::vec(arb_match(), 0..10)) { | |
101 | let formatter = JsonFormatter; | |
102 | let output = formatter.format(&matches); | |
103 | // Check that it's valid JSON | |
104 | let parsed: Vec<Match> = serde_json::from_str(&output).unwrap(); | |
105 | prop_assert_eq!(parsed, matches); | |
106 | } | |
107 | } | |
108 | } |
Line | Count | Source |
1 | use super::Formatter; | |
2 | use code_guardian_core::Match; | |
3 | ||
4 | /// Formatter that outputs matches in Markdown table format. | |
5 | /// Suitable for documentation or GitHub issues. | |
6 | pub struct MarkdownFormatter; | |
7 | ||
8 | impl Formatter for MarkdownFormatter { | |
9 | 1 | fn format(&self, matches: &[Match]) -> String { |
10 | 1 | if matches.is_empty() { |
11 | 0 | return "No matches found.".to_string(); |
12 | 1 | } |
13 | ||
14 | 1 | let mut output = String::from("| File | Line | Column | Pattern | Message |\n"); |
15 | 1 | output.push_str("|------|------|--------|---------|---------|\n"); |
16 | ||
17 | 2 | for |
18 | 1 | output.push_str(&format!( |
19 | 1 | "| {} | {} | {} | {} | {} |\n", |
20 | 1 | escape_md(&m.file_path), |
21 | 1 | m.line_number, |
22 | 1 | m.column, |
23 | 1 | escape_md(&m.pattern), |
24 | 1 | escape_md(&m.message) |
25 | 1 | )); |
26 | 1 | } |
27 | ||
28 | 1 | output |
29 | 1 | } |
30 | } | |
31 | ||
32 | /// Escapes pipe characters in markdown table cells. | |
33 | 3 | fn escape_md(text: &str) -> String { |
34 | 3 | text.replace('|', "\\|") |
35 | 3 | } |
36 | ||
37 | #[cfg(test)] | |
38 | mod tests { | |
39 | use super::*; | |
40 | ||
41 | #[test] | |
42 | fn test_empty_matches() { | |
43 | let formatter = MarkdownFormatter; | |
44 | let matches = vec![]; | |
45 | let output = formatter.format(&matches); | |
46 | assert_eq!(output, "No matches found."); | |
47 | } | |
48 | ||
49 | #[test] | |
50 | fn test_single_match() { | |
51 | let formatter = MarkdownFormatter; | |
52 | let matches = vec![Match { | |
53 | file_path: "test.rs".to_string(), | |
54 | line_number: 1, | |
55 | column: 1, | |
56 | pattern: "TODO".to_string(), | |
57 | message: "TODO: fix this".to_string(), | |
58 | }]; | |
59 | let output = formatter.format(&matches); | |
60 | assert!(output.contains("| test.rs | 1 | 1 | TODO | TODO: fix this |")); | |
61 | assert!(output.contains("|------|------|--------|---------|---------|")); | |
62 | } | |
63 | ||
64 | #[test] | |
65 | fn test_escape_pipes() { | |
66 | let formatter = MarkdownFormatter; | |
67 | let matches = vec![Match { | |
68 | file_path: "test|file.rs".to_string(), | |
69 | line_number: 1, | |
70 | column: 1, | |
71 | pattern: "TODO".to_string(), | |
72 | message: "TODO|fix".to_string(), | |
73 | }]; | |
74 | let output = formatter.format(&matches); | |
75 | assert!(output.contains("test\\|file.rs")); | |
76 | assert!(output.contains("TODO\\|fix")); | |
77 | } | |
78 | ||
79 | #[test] | |
80 | fn test_multiple_matches() { | |
81 | let formatter = MarkdownFormatter; | |
82 | let matches = vec![ | |
83 | Match { | |
84 | file_path: "test.rs".to_string(), | |
85 | line_number: 1, | |
86 | column: 1, | |
87 | pattern: "TODO".to_string(), | |
88 | message: "TODO".to_string(), | |
89 | }, | |
90 | Match { | |
91 | file_path: "test.js".to_string(), | |
92 | line_number: 2, | |
93 | column: 3, | |
94 | pattern: "FIXME".to_string(), | |
95 | message: "FIXME".to_string(), | |
96 | }, | |
97 | ]; | |
98 | let output = formatter.format(&matches); | |
99 | assert!(output.contains("test.rs")); | |
100 | assert!(output.contains("test.js")); | |
101 | assert!(output.contains("TODO")); | |
102 | assert!(output.contains("FIXME")); | |
103 | } | |
104 | } | |
105 | ||
106 | #[cfg(test)] | |
107 | mod proptest_tests { | |
108 | use super::*; | |
109 | use proptest::prelude::*; | |
110 | ||
111 | fn arb_match() -> impl Strategy<Value = Match> { | |
112 | ( | |
113 | "[a-zA-Z0-9_.]+", | |
114 | 1..10000usize, | |
115 | 1..10000usize, | |
116 | "[A-Z]+", | |
117 | ".*", | |
118 | ) | |
119 | .prop_map(|(fp, ln, col, pat, msg)| Match { | |
120 | file_path: fp.to_string(), | |
121 | line_number: ln, | |
122 | column: col, | |
123 | pattern: pat.to_string(), | |
124 | message: msg.to_string(), | |
125 | }) | |
126 | } | |
127 | ||
128 | proptest! { | |
129 | #[test] | |
130 | fn test_markdown_formatter_arbitrary_matches(matches in proptest::collection::vec(arb_match(), 0..10)) { | |
131 | let formatter = MarkdownFormatter; | |
132 | let output = formatter.format(&matches); | |
133 | if matches.is_empty() { | |
134 | prop_assert_eq!(output, "No matches found."); | |
135 | } else { | |
136 | prop_assert!(output.contains("|")); | |
137 | prop_assert!(output.contains("File")); | |
138 | } | |
139 | } | |
140 | } | |
141 | } |
Line | Count | Source |
1 | use super::Formatter; | |
2 | use code_guardian_core::Match; | |
3 | ||
4 | /// Formatter that outputs matches in a simple text format. | |
5 | /// Each match is displayed as "file:line:column: pattern - message". | |
6 | pub struct TextFormatter; | |
7 | ||
8 | impl Formatter for TextFormatter { | |
9 | 24 | fn format(&self, matches: &[Match]) -> String { |
10 | 24 | if matches.is_empty() { |
11 | 2 | return "No matches found.".to_string(); |
12 | 22 | } |
13 | ||
14 | 22 | let mut output = String::new(); |
15 | 152 | for |
16 | 130 | output.push_str(&format!( |
17 | 130 | "{}:{}:{}: {} - {}\n", |
18 | 130 | m.file_path, m.line_number, m.column, m.pattern, m.message |
19 | 130 | )); |
20 | 130 | } |
21 | 22 | output.trim_end().to_string() |
22 | 24 | } |
23 | } | |
24 | ||
25 | #[cfg(test)] | |
26 | mod tests { | |
27 | use super::*; | |
28 | ||
29 | #[test] | |
30 | fn test_empty_matches() { | |
31 | let formatter = TextFormatter; | |
32 | let matches = vec![]; | |
33 | let output = formatter.format(&matches); | |
34 | assert_eq!(output, "No matches found."); | |
35 | } | |
36 | ||
37 | #[test] | |
38 | fn test_single_match() { | |
39 | let formatter = TextFormatter; | |
40 | let matches = vec![Match { | |
41 | file_path: "test.rs".to_string(), | |
42 | line_number: 1, | |
43 | column: 1, | |
44 | pattern: "TODO".to_string(), | |
45 | message: "TODO comment".to_string(), | |
46 | }]; | |
47 | let output = formatter.format(&matches); | |
48 | let expected = "test.rs:1:1: TODO - TODO comment"; | |
49 | assert_eq!(output, expected); | |
50 | } | |
51 | ||
52 | #[test] | |
53 | fn test_multiple_matches_snapshot() { | |
54 | let formatter = TextFormatter; | |
55 | let matches = vec![ | |
56 | Match { | |
57 | file_path: "src/main.rs".to_string(), | |
58 | line_number: 10, | |
59 | column: 5, | |
60 | pattern: "TODO".to_string(), | |
61 | message: "Found a TODO".to_string(), | |
62 | }, | |
63 | Match { | |
64 | file_path: "src/lib.rs".to_string(), | |
65 | line_number: 10, | |
66 | column: 1, | |
67 | pattern: "FIXME".to_string(), | |
68 | message: "FIXME: temporary workaround".to_string(), | |
69 | }, | |
70 | ]; | |
71 | let output = formatter.format(&matches); | |
72 | let expected = "src/main.rs:10:5: TODO - Found a TODO\nsrc/lib.rs:10:1: FIXME - FIXME: temporary workaround"; | |
73 | assert_eq!(output, expected); | |
74 | } | |
75 | ||
76 | #[test] | |
77 | fn test_multiple_matches() { | |
78 | let formatter = TextFormatter; | |
79 | let matches = vec![ | |
80 | Match { | |
81 | file_path: "test.rs".to_string(), | |
82 | line_number: 1, | |
83 | column: 1, | |
84 | pattern: "TODO".to_string(), | |
85 | message: "TODO".to_string(), | |
86 | }, | |
87 | Match { | |
88 | file_path: "test.js".to_string(), | |
89 | line_number: 2, | |
90 | column: 3, | |
91 | pattern: "FIXME".to_string(), | |
92 | message: "FIXME".to_string(), | |
93 | }, | |
94 | ]; | |
95 | let output = formatter.format(&matches); | |
96 | let expected = "test.rs:1:1: TODO - TODO\ntest.js:2:3: FIXME - FIXME"; | |
97 | assert_eq!(output, expected); | |
98 | } | |
99 | } | |
100 | ||
101 | #[cfg(test)] | |
102 | mod proptest_tests { | |
103 | use super::*; | |
104 | use proptest::prelude::*; | |
105 | ||
106 | fn arb_match() -> impl Strategy<Value = Match> { | |
107 | ( | |
108 | "[a-zA-Z0-9_.]+", | |
109 | 1..10000usize, | |
110 | 1..10000usize, | |
111 | "[A-Z]+", | |
112 | ".*", | |
113 | ) | |
114 | .prop_map(|(fp, ln, col, pat, msg)| Match { | |
115 | file_path: fp.to_string(), | |
116 | line_number: ln, | |
117 | column: col, | |
118 | pattern: pat.to_string(), | |
119 | message: msg.to_string(), | |
120 | }) | |
121 | } | |
122 | ||
123 | proptest! { | |
124 | #[test] | |
125 | fn test_text_formatter_arbitrary_matches(matches in proptest::collection::vec(arb_match(), 0..10)) { | |
126 | let formatter = TextFormatter; | |
127 | let output = formatter.format(&matches); | |
128 | // Just check no panic, and if not empty, contains something | |
129 | if !matches.is_empty() { | |
130 | prop_assert!(!output.is_empty()); | |
131 | } else { | |
132 | prop_assert_eq!(output, "No matches found."); | |
133 | } | |
134 | } | |
135 | } | |
136 | } |
Line | Count | Source |
1 | use anyhow::Result; | |
2 | use code_guardian_core::Match; | |
3 | use rusqlite::{Connection, OptionalExtension}; | |
4 | use serde::{Deserialize, Serialize}; | |
5 | use std::path::Path; | |
6 | ||
7 | refinery::embed_migrations!("migrations"); | |
8 | ||
9 | /// Represents a scan session with its metadata and results. | |
10 | #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] | |
11 | pub struct Scan { | |
12 | /// Unique identifier for the scan. None if not yet saved. | |
13 | pub id: Option<i64>, | |
14 | /// Timestamp when the scan was performed (Unix timestamp). | |
15 | pub timestamp: i64, | |
16 | /// Root path of the scanned directory. | |
17 | pub root_path: String, | |
18 | /// List of matches found during the scan. | |
19 | pub matches: Vec<Match>, | |
20 | } | |
21 | ||
22 | /// Repository trait for scan data access. | |
23 | pub trait ScanRepository { | |
24 | /// Saves a new scan and returns its ID. | |
25 | fn save_scan(&mut self, scan: &Scan) -> Result<i64>; | |
26 | /// Retrieves a scan by ID, including its matches. | |
27 | fn get_scan(&self, id: i64) -> Result<Option<Scan>>; | |
28 | /// Retrieves all scans, without matches for performance. | |
29 | fn get_all_scans(&self) -> Result<Vec<Scan>>; | |
30 | /// Deletes a scan and its matches. | |
31 | fn delete_scan(&mut self, id: i64) -> Result<()>; | |
32 | } | |
33 | ||
34 | /// SQLite implementation of the scan repository. | |
35 | pub struct SqliteScanRepository { | |
36 | conn: Connection, | |
37 | } | |
38 | ||
39 | impl SqliteScanRepository { | |
40 | /// Creates a new repository with an in-memory database for testing. | |
41 | 0 | pub fn new_in_memory() -> Result<Self> { |
42 | 0 | let mut conn = Connection::open_in_memory()?; |
43 | 0 | Self::init_db(&mut conn)?; |
44 | 0 | Ok(Self { conn }) |
45 | 0 | } |
46 | ||
47 | /// Creates a new repository with a file-based database. | |
48 | 41 | pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> { |
49 | 41 | let |
50 | 40 | Self::init_db(&mut conn) |
51 | 40 | Ok(Self { conn }) |
52 | 41 | } |
53 | ||
54 | /// Initializes the database schema using migrations. | |
55 | 40 | fn init_db(conn: &mut Connection) -> Result<()> { |
56 | 40 | migrations::runner().run(conn) |
57 | 40 | Ok(()) |
58 | 40 | } |
59 | } | |
60 | ||
61 | impl ScanRepository for SqliteScanRepository { | |
62 | 29 | fn save_scan(&mut self, scan: &Scan) -> Result<i64> { |
63 | 29 | let tx = self.conn.transaction() |
64 | 29 | tx.execute( |
65 | 29 | "INSERT INTO scans (timestamp, root_path) VALUES (?1, ?2)", |
66 | 29 | (scan.timestamp, &scan.root_path), |
67 | 29 | ) |
68 | 29 | let scan_id = tx.last_insert_rowid(); |
69 | 164 | for |
70 | 135 | tx.execute( |
71 | 135 | "INSERT INTO matches (scan_id, file_path, line_number, column, pattern, message) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", |
72 | 135 | (scan_id, &m.file_path, m.line_number as i64, m.column as i64, &m.pattern, &m.message), |
73 | 135 | ) |
74 | } | |
75 | 29 | tx.commit() |
76 | 29 | Ok(scan_id) |
77 | 29 | } |
78 | ||
79 | 14 | fn get_scan(&self, id: i64) -> Result<Option<Scan>> { |
80 | 14 | let mut stmt = self |
81 | 14 | .conn |
82 | 14 | .prepare("SELECT id, timestamp, root_path FROM scans WHERE id = ?1") |
83 | 14 | let scan_opt = stmt |
84 | 14 | .query_row([id], |row| |
85 | Ok(Scan { | |
86 | 13 | id: Some(row.get(0) |
87 | 13 | timestamp: row.get(1) |
88 | 13 | root_path: row.get(2) |
89 | 13 | matches: Vec::new(), |
90 | }) | |
91 | 13 | }) |
92 | 14 | .optional() |
93 | 14 | if let Some( |
94 | 13 | let mut stmt = self.conn.prepare( |
95 | 13 | "SELECT file_path, line_number, column, pattern, message FROM matches WHERE scan_id = ?1", |
96 | 0 | )?; |
97 | 18 | let |
98 | Ok(Match { | |
99 | 18 | file_path: row.get(0) |
100 | 18 | line_number: row.get(1) |
101 | 18 | column: row.get(2) |
102 | 18 | pattern: row.get(3) |
103 | 18 | message: row.get(4) |
104 | }) | |
105 | 18 | }) |
106 | 31 | for |
107 | 18 | scan.matches.push(m |
108 | } | |
109 | 13 | Ok(Some(scan)) |
110 | } else { | |
111 | 1 | Ok(None) |
112 | } | |
113 | 14 | } |
114 | ||
115 | 4 | fn get_all_scans(&self) -> Result<Vec<Scan>> { |
116 | 4 | let mut stmt = self |
117 | 4 | .conn |
118 | 4 | .prepare("SELECT id, timestamp, root_path FROM scans ORDER BY timestamp DESC") |
119 | 4 | let scans_iter = stmt.query_map([], |row| { |
120 | Ok(Scan { | |
121 | 4 | id: Some(row.get(0) |
122 | 4 | timestamp: row.get(1) |
123 | 4 | root_path: row.get(2) |
124 | 4 | matches: Vec::new(), // Not loaded for performance |
125 | }) | |
126 | 4 | }) |
127 | 4 | let mut scans = Vec::new(); |
128 | 8 | for |
129 | 4 | scans.push(scan |
130 | } | |
131 | 4 | Ok(scans) |
132 | 4 | } |
133 | ||
134 | 0 | fn delete_scan(&mut self, id: i64) -> Result<()> { |
135 | 0 | let tx = self.conn.transaction()?; |
136 | 0 | tx.execute("DELETE FROM matches WHERE scan_id = ?1", [id])?; |
137 | 0 | tx.execute("DELETE FROM scans WHERE id = ?1", [id])?; |
138 | 0 | tx.commit()?; |
139 | 0 | Ok(()) |
140 | 0 | } |
141 | } | |
142 | ||
143 | #[cfg(test)] | |
144 | mod tests { | |
145 | use super::*; | |
146 | use chrono::Utc; | |
147 | use tempfile::TempDir; | |
148 | ||
149 | #[test] | |
150 | fn test_save_and_get_scan() { | |
151 | let mut repo = SqliteScanRepository::new_in_memory().unwrap(); | |
152 | let now = Utc::now().timestamp(); | |
153 | let scan = Scan { | |
154 | id: None, | |
155 | timestamp: now, | |
156 | root_path: "/test/path".to_string(), | |
157 | matches: vec![Match { | |
158 | file_path: "file.rs".to_string(), | |
159 | line_number: 1, | |
160 | column: 1, | |
161 | pattern: "TODO".to_string(), | |
162 | message: "TODO".to_string(), | |
163 | }], | |
164 | }; | |
165 | let id = repo.save_scan(&scan).unwrap(); | |
166 | let retrieved = repo.get_scan(id).unwrap().unwrap(); | |
167 | assert_eq!(retrieved.id, Some(id)); | |
168 | assert_eq!(retrieved.timestamp, now); | |
169 | assert_eq!(retrieved.root_path, scan.root_path); | |
170 | assert_eq!(retrieved.matches.len(), 1); | |
171 | assert_eq!(retrieved.matches[0], scan.matches[0]); | |
172 | } | |
173 | ||
174 | #[test] | |
175 | fn test_get_all_scans() { | |
176 | let mut repo = SqliteScanRepository::new_in_memory().unwrap(); | |
177 | let now1 = Utc::now().timestamp(); | |
178 | let scan1 = Scan { | |
179 | id: None, | |
180 | timestamp: now1, | |
181 | root_path: "/path1".to_string(), | |
182 | matches: vec![], | |
183 | }; | |
184 | let now2 = Utc::now().timestamp(); | |
185 | let scan2 = Scan { | |
186 | id: None, | |
187 | timestamp: now2, | |
188 | root_path: "/path2".to_string(), | |
189 | matches: vec![], | |
190 | }; | |
191 | repo.save_scan(&scan1).unwrap(); | |
192 | repo.save_scan(&scan2).unwrap(); | |
193 | let all = repo.get_all_scans().unwrap(); | |
194 | assert_eq!(all.len(), 2); | |
195 | // Ordered by timestamp desc | |
196 | assert_eq!(all[0].timestamp, now2); | |
197 | assert_eq!(all[1].timestamp, now1); | |
198 | } | |
199 | ||
200 | #[test] | |
201 | fn test_delete_scan() { | |
202 | let mut repo = SqliteScanRepository::new_in_memory().unwrap(); | |
203 | let scan = Scan { | |
204 | id: None, | |
205 | timestamp: Utc::now().timestamp(), | |
206 | root_path: "/test".to_string(), | |
207 | matches: vec![Match { | |
208 | file_path: "f.rs".to_string(), | |
209 | line_number: 1, | |
210 | column: 1, | |
211 | pattern: "FIXME".to_string(), | |
212 | message: "FIXME".to_string(), | |
213 | }], | |
214 | }; | |
215 | let id = repo.save_scan(&scan).unwrap(); | |
216 | repo.delete_scan(id).unwrap(); | |
217 | assert!(repo.get_scan(id).unwrap().is_none()); | |
218 | } | |
219 | ||
220 | #[test] | |
221 | fn test_file_based_repo() { | |
222 | let temp_dir = TempDir::new().unwrap(); | |
223 | let db_path = temp_dir.path().join("test.db"); | |
224 | { | |
225 | let mut repo = SqliteScanRepository::new(&db_path).unwrap(); | |
226 | let scan = Scan { | |
227 | id: None, | |
228 | timestamp: Utc::now().timestamp(), | |
229 | root_path: "/file/test".to_string(), | |
230 | matches: vec![], | |
231 | }; | |
232 | repo.save_scan(&scan).unwrap(); | |
233 | } | |
234 | { | |
235 | let repo = SqliteScanRepository::new(&db_path).unwrap(); | |
236 | let all = repo.get_all_scans().unwrap(); | |
237 | assert_eq!(all.len(), 1); | |
238 | } | |
239 | } | |
240 | } | |
241 | ||
242 | #[cfg(test)] | |
243 | mod proptest_tests { | |
244 | use super::*; | |
245 | use chrono::Utc; | |
246 | use proptest::prelude::*; | |
247 | ||
248 | fn arb_match() -> impl Strategy<Value = Match> { | |
249 | ( | |
250 | "[a-zA-Z0-9_.]+", | |
251 | 1..10000usize, | |
252 | 1..10000usize, | |
253 | "[A-Z]+", | |
254 | ".*", | |
255 | ) | |
256 | .prop_map(|(fp, ln, col, pat, msg)| Match { | |
257 | file_path: fp.to_string(), | |
258 | line_number: ln, | |
259 | column: col, | |
260 | pattern: pat.to_string(), | |
261 | message: msg.to_string(), | |
262 | }) | |
263 | } | |
264 | ||
265 | proptest! { | |
266 | #[test] | |
267 | fn test_save_get_arbitrary_scan(matches in proptest::collection::vec(arb_match(), 0..10)) { | |
268 | let mut repo = SqliteScanRepository::new_in_memory().unwrap(); | |
269 | let scan = Scan { | |
270 | id: None, | |
271 | timestamp: Utc::now().timestamp(), | |
272 | root_path: "test_path".to_string(), | |
273 | matches: matches.clone(), | |
274 | }; | |
275 | let id = repo.save_scan(&scan).unwrap(); | |
276 | let retrieved = repo.get_scan(id).unwrap().unwrap(); | |
277 | assert_eq!(retrieved.matches.len(), scan.matches.len()); | |
278 | // Since order might not be preserved, check sets | |
279 | use std::collections::HashSet; | |
280 | let set1: HashSet<_> = scan.matches.into_iter().collect(); | |
281 | let set2: HashSet<_> = retrieved.matches.into_iter().collect(); | |
282 | prop_assert_eq!(set1, set2); | |
283 | } | |
284 | } | |
285 | } |
Click here for information about interpreting this report.
| Filename | Function Coverage | Line Coverage | Region Coverage | Branch Coverage |
cli/src/advanced_handlers.rs | 77.78% (7/9) | 64.00% (144/225) | 55.16% (203/368) | - (0/0) |
cli/src/benchmark.rs | 66.67% (2/3) | 56.52% (65/115) | 63.44% (144/227) | - (0/0) |
cli/src/command_handlers.rs | 60.00% (3/5) | 41.10% (30/73) | 36.15% (47/130) | - (0/0) |
cli/src/comparison_handlers.rs | 100.00% (4/4) | 96.55% (28/29) | 90.00% (54/60) | - (0/0) |
cli/src/git_integration.rs | 33.33% (5/15) | 17.71% (31/175) | 16.54% (44/266) | - (0/0) |
cli/src/main.rs | 100.00% (2/2) | 80.56% (145/180) | 58.96% (79/134) | - (0/0) |
cli/src/production_handlers.rs | 40.74% (11/27) | 17.24% (80/464) | 22.66% (194/856) | - (0/0) |
cli/src/report_handlers.rs | 100.00% (2/2) | 100.00% (21/21) | 95.12% (39/41) | - (0/0) |
cli/src/scan_handlers.rs | 83.33% (5/6) | 87.79% (151/172) | 82.85% (227/274) | - (0/0) |
cli/src/stack_presets.rs | 0.00% (0/1) | 0.00% (0/45) | 0.00% (0/107) | - (0/0) |
cli/src/utils.rs | 100.00% (12/12) | 88.75% (71/80) | 87.22% (116/133) | - (0/0) |
core/src/cache.rs | 0.00% (0/6) | 0.00% (0/18) | 0.00% (0/24) | - (0/0) |
core/src/config.rs | 66.67% (2/3) | 54.76% (23/42) | 67.74% (63/93) | - (0/0) |
core/src/custom_detectors.rs | 74.07% (20/27) | 67.45% (143/212) | 62.18% (222/357) | - (0/0) |
core/src/detector_factory.rs | 41.67% (5/12) | 31.79% (62/195) | 29.22% (64/219) | - (0/0) |
core/src/detectors.rs | 80.00% (28/35) | 59.51% (147/247) | 65.26% (278/426) | - (0/0) |
core/src/distributed.rs | 84.62% (22/26) | 87.32% (179/205) | 80.36% (270/336) | - (0/0) |
core/src/enhanced_config.rs | 0.00% (0/1) | 0.00% (0/108) | 0.00% (0/333) | - (0/0) |
core/src/incremental.rs | 73.33% (11/15) | 78.74% (163/207) | 79.34% (265/334) | - (0/0) |
core/src/lib.rs | 100.00% (4/4) | 97.06% (33/34) | 94.83% (55/58) | - (0/0) |
core/src/llm_detectors.rs | 0.00% (0/29) | 0.00% (0/267) | 0.00% (0/310) | - (0/0) |
core/src/monitoring.rs | 41.18% (7/17) | 42.61% (49/115) | 38.41% (53/138) | - (0/0) |
core/src/optimized_scanner.rs | 48.57% (17/35) | 42.70% (158/370) | 40.81% (282/691) | - (0/0) |
core/src/performance.rs | 0.00% (0/19) | 0.00% (0/135) | 0.00% (0/167) | - (0/0) |
output/src/formatters/csv.rs | 100.00% (1/1) | 100.00% (17/17) | 100.00% (27/27) | - (0/0) |
output/src/formatters/html.rs | 100.00% (2/2) | 93.33% (28/30) | 93.55% (29/31) | - (0/0) |
output/src/formatters/json.rs | 50.00% (1/2) | 75.00% (3/4) | 71.43% (5/7) | - (0/0) |
output/src/formatters/markdown.rs | 100.00% (2/2) | 95.24% (20/21) | 93.10% (27/29) | - (0/0) |
output/src/formatters/text.rs | 100.00% (1/1) | 100.00% (13/13) | 100.00% (16/16) | - (0/0) |
storage/src/lib.rs | 80.00% (8/10) | 82.14% (69/84) | 68.86% (115/167) | - (0/0) |
Totals | 55.26% (184/333) | 47.99% (1873/3903) | 45.89% (2918/6359) | - (0/0) |
Real-time monitoring and performance metrics
+Last updated: Loading...
+